François
9 months ago
13 changed files with 3096 additions and 3 deletions
File diff suppressed because it is too large
@ -0,0 +1,14 @@ |
|||
all: testAutoText doc |
|||
./testAutoText < data/LesForcatsDuMariage-lower.txt |
|||
|
|||
testAutoText: cpp/TestAutoText.cpp cpp/TextProdChar.cpp include/TextProdChar.hpp cpp/LexFreq.cpp include/LexFreq.hpp cpp/Context.cpp include/Context.hpp |
|||
g++ -g -I ./include/ cpp/TestAutoText.cpp cpp/TextProdChar.cpp cpp/LexFreq.cpp cpp/Context.cpp -o testAutoText |
|||
|
|||
clean: |
|||
rm -fr *~ cpp/*~ include/*~ |
|||
|
|||
wipe: clean |
|||
rm -fr testAutoText autoTextDoc |
|||
|
|||
doc: |
|||
doxygen Doxyfile >/dev/null |
@ -1,5 +1,5 @@ |
|||
# autoText |
|||
|
|||
Exemple de production automatique de texte. |
|||
|
|||
Ce que certains appel IA n'est que l'application statistique dénué de sémantique. |
|||
Exemple de production automatique de texte. |
|||
|
|||
Ce que certains appel IA n'est que l'application statistique dénué de sémantique. |
|||
|
After Width: | Height: | Size: 34 KiB |
@ -0,0 +1,57 @@ |
|||
#include <iostream> |
|||
#include <string> |
|||
#include <cstring> |
|||
#include <map> |
|||
#include <ctype.h> |
|||
|
|||
#include "Context.hpp" |
|||
|
|||
using namespace std; |
|||
|
|||
// ========================================
|
|||
const int Context::prevSize (10); |
|||
const string Context::end (".?!"); |
|||
const string Context::sep (" ,;’-"); |
|||
|
|||
bool |
|||
Context::validChar (const char &c) { |
|||
if (isalpha (c)) |
|||
return true; |
|||
return |
|||
end.find (c) != string::npos || |
|||
sep.find (c) != string::npos; |
|||
} |
|||
|
|||
bool |
|||
Context::isEnd (const string &c) { |
|||
return end.find (c) != string::npos; |
|||
} |
|||
|
|||
bool |
|||
Context::isSep (const string &c) { |
|||
return sep.find (c) != string::npos; |
|||
} |
|||
|
|||
void |
|||
Context::forward (string current) { |
|||
if (!prevSize) |
|||
return; |
|||
if (current == " " && prev.empty ()) |
|||
return; |
|||
if (end.find (current) != string::npos) { |
|||
prev.clear (); |
|||
return; |
|||
} |
|||
prev += current; |
|||
if (prev.size () > prevSize) { |
|||
int count (prevSize); |
|||
for (auto rit (prev.crbegin ()); rit != prev.crend (); ++rit) |
|||
// (ASCII || start UTF) && countdown
|
|||
if ((! (*rit & 0b10000000) || (*rit & 0b01000000)) && ! --count) { |
|||
prev = prev.substr (prev.crend () - rit -1); |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
|
|||
// ========================================
|
@ -0,0 +1,44 @@ |
|||
#include <iostream> |
|||
#include <string> |
|||
#include <cstring> |
|||
#include <map> |
|||
#include <ctype.h> |
|||
|
|||
#include "LexFreq.hpp" |
|||
|
|||
using namespace std; |
|||
|
|||
// ========================================
|
|||
ostream& |
|||
operator << (ostream& os, const LexFreq &freqLex) { |
|||
os << " size = " << freqLex.size << endl; |
|||
for (const auto& [key, value]: freqLex.hist) |
|||
os << key << " = " << value << endl; |
|||
return os; |
|||
} |
|||
|
|||
LexFreq::LexFreq () : |
|||
size (0) { |
|||
} |
|||
|
|||
void |
|||
LexFreq::addChar (const string &c) { |
|||
++hist [c]; |
|||
++size; |
|||
} |
|||
|
|||
string |
|||
LexFreq::getChar () const { |
|||
if (!size) |
|||
return "*"; |
|||
long r (rand () % size); |
|||
for (const auto& [key, value]: hist) { |
|||
if (r <= value) |
|||
return key; |
|||
r -= value; |
|||
} |
|||
|
|||
return "*"; |
|||
} |
|||
|
|||
// ========================================
|
@ -0,0 +1,55 @@ |
|||
/*
|
|||
|
|||
La Part du feu. Les Terreurs du bourgeois Prudence et de son ami Furibus, 1873 par M.-L. Gagneur (1832-1902). |
|||
|
|||
https://gallica.bnf.fr/services/engine/search/sru?operation=searchRetrieve&exactSearch=false&collapsing=true&version=1.2&query=(dc.creator%20all%20%22Marie-Louise%20Gagneur%22%20or%20dc.contributor%20all%20%22Marie-Louise%20Gagneur%22%20)%20&suggest=10&keywords=Marie-Louise%20Gagneur
|
|||
|
|||
https://fr.wikisource.org/wiki/La_Part_du_feu?rk=42918;4
|
|||
*/ |
|||
|
|||
#include <iostream> |
|||
#include <fstream> |
|||
#include <cstdlib> |
|||
|
|||
#include "TextProdChar.hpp" |
|||
|
|||
using namespace std; |
|||
|
|||
void |
|||
test () { |
|||
for (string line; getline (cin, line); ) { |
|||
for (string::iterator it (line.begin ()); |
|||
it != line.end (); |
|||
++it) { |
|||
unsigned char c = *it; |
|||
if (c & 0b10000000) { |
|||
++it; |
|||
cout << c << *it << "\\"; |
|||
continue; |
|||
} |
|||
cout << c << "/"; |
|||
} |
|||
} |
|||
} |
|||
|
|||
// ========================================
|
|||
int |
|||
main (int argc, char** argv) { |
|||
cerr << endl << " ** Test TextProdChar" << endl; |
|||
|
|||
// ifstream in ("in.txt");
|
|||
// ofstream out ("out.txt");
|
|||
|
|||
TextProdChar textProdChar; |
|||
srand (0); //(time (0));
|
|||
|
|||
//test ();
|
|||
textProdChar.learn (cin); |
|||
// cerr << textProdChar << endl;
|
|||
|
|||
textProdChar.prod (cout, 1000); |
|||
cerr << endl; |
|||
return 0; |
|||
} |
|||
|
|||
// ========================================
|
@ -0,0 +1,91 @@ |
|||
#include <iostream> |
|||
#include <string> |
|||
#include <cstring> |
|||
#include <map> |
|||
#include <ctype.h> |
|||
#include <boost/algorithm/string.hpp> |
|||
|
|||
#include "Context.hpp" |
|||
#include "LexFreq.hpp" |
|||
#include "TextProdChar.hpp" |
|||
|
|||
using namespace std; |
|||
|
|||
// ========================================
|
|||
ostream& |
|||
operator << (ostream& os, const TextProdChar &ia) { |
|||
string empty; |
|||
os << ia.prevFreq.at(empty); |
|||
os << " size = " << ia.prevFreq.size () << endl; |
|||
for (const auto& [key, value]: ia.prevFreq) |
|||
os << key << " = " << value.getSize () << endl; |
|||
return os; |
|||
} |
|||
|
|||
|
|||
void |
|||
TextProdChar::learn (istream &in) { |
|||
string comp; |
|||
for (string line; getline (in, line); ) { |
|||
for (string::iterator it (line.begin ()); |
|||
it != line.end (); |
|||
++it) { |
|||
char c = *it; |
|||
comp.clear (); |
|||
comp.push_back (c); |
|||
|
|||
if (c & 0b10000000) { |
|||
if ((c & 0b11100000) == 0b11000000) { |
|||
++it; comp.push_back (*it); |
|||
} else if ((c & 0b11110000) == 0b11100000) { |
|||
++it; comp.push_back (*it); |
|||
++it; comp.push_back (*it); |
|||
} else if ((c & 0b11111000) == 0b11110000) { |
|||
++it; comp.push_back (*it); |
|||
++it; comp.push_back (*it); |
|||
++it; comp.push_back (*it); |
|||
} else { |
|||
cout << "*"; |
|||
} |
|||
} else { |
|||
// ASCII
|
|||
if (! Context::validChar (c)) |
|||
continue; |
|||
} |
|||
|
|||
if (context.empty () && (context.isSep (comp) || context.isEnd (comp))) |
|||
continue; |
|||
prevFreq[context.getState ()].addChar (comp); |
|||
context.forward (comp); |
|||
} |
|||
} |
|||
} |
|||
|
|||
void |
|||
TextProdChar::prod (ostream &out, const long &size) { |
|||
context.reset (); |
|||
bool nl (false), end (false); |
|||
for (long i (0), col (0); ; ++i, ++col) { |
|||
if (i > size) |
|||
end = true; |
|||
if (col > 80) |
|||
nl = true; |
|||
bool maj (context.empty ()); |
|||
string next (prevFreq[context.getState ()].getChar ()); |
|||
context.forward (next); |
|||
if (maj) |
|||
boost::to_upper (next); |
|||
out << next; |
|||
if (context.isEnd (next) || |
|||
(nl && context.isSep (next))) { |
|||
out << endl; |
|||
nl = false; |
|||
col = 0; |
|||
if (end && context.isEnd (next)) |
|||
break; |
|||
} |
|||
} |
|||
out << endl; |
|||
} |
|||
|
|||
// ========================================
|
File diff suppressed because one or more lines are too long
@ -0,0 +1,33 @@ |
|||
/*!
|
|||
* @file Context.hpp |
|||
* @brief calcul de fréquence et prédiction |
|||
* @author F. Merciol |
|||
* @version 0.1 |
|||
* @date 18 / août 2023 |
|||
*/ |
|||
#ifndef _Context_hpp_ |
|||
#define _Context_hpp_ |
|||
|
|||
#include <iostream> |
|||
#include <map> |
|||
|
|||
using namespace std; |
|||
|
|||
// ========================================
|
|||
class Context { |
|||
string prev; |
|||
static const int prevSize; |
|||
static const string end; |
|||
static const string sep; |
|||
public: |
|||
static bool validChar (const char &c); |
|||
static bool isEnd (const string &c); |
|||
static bool isSep (const string &c); |
|||
bool empty () { return prev.empty (); } |
|||
void reset () { prev.clear (); } |
|||
string getState () { return prev; } |
|||
void forward (string current); |
|||
}; |
|||
|
|||
// ========================================
|
|||
#endif // _Context_hpp_
|
@ -0,0 +1,30 @@ |
|||
/*!
|
|||
* @file LexFreq.hpp |
|||
* @brief calcul de fréquence et prédiction |
|||
* @author F. Merciol |
|||
* @version 0.1 |
|||
* @date 18 / août 2023 |
|||
*/ |
|||
#ifndef _LexFreq_hpp_ |
|||
#define _LexFreq_hpp_ |
|||
|
|||
#include <iostream> |
|||
#include <map> |
|||
|
|||
using namespace std; |
|||
|
|||
// ========================================
|
|||
class LexFreq { |
|||
long size; |
|||
map<string, long> hist; |
|||
public: |
|||
friend ostream& operator << (ostream& os, const LexFreq &freqLex); |
|||
|
|||
LexFreq (); |
|||
long getSize () const { return hist.size (); } |
|||
void addChar (const string &c); |
|||
string getChar () const; |
|||
}; |
|||
|
|||
// ========================================
|
|||
#endif // _LexFreq_hpp_
|
@ -0,0 +1,32 @@ |
|||
/*!
|
|||
* @file TextProdChar.hpp |
|||
* @brief calcul de fréquence et prédiction |
|||
* @author F. Merciol |
|||
* @version 0.1 |
|||
* @date 18 / août 2023 |
|||
*/ |
|||
#ifndef _TextProdChar_hpp_ |
|||
#define _TextProdChar_hpp_ |
|||
|
|||
#include <iostream> |
|||
#include <map> |
|||
|
|||
#include "Context.hpp" |
|||
#include "LexFreq.hpp" |
|||
|
|||
using namespace std; |
|||
|
|||
// ========================================
|
|||
class TextProdChar { |
|||
Context context; |
|||
map<string, LexFreq> prevFreq; |
|||
public: |
|||
friend ostream& operator << (ostream& os, const TextProdChar &ia); |
|||
|
|||
void learn (istream &in); |
|||
void prod (ostream &out, const long &size); |
|||
}; |
|||
|
|||
|
|||
// ========================================
|
|||
#endif // _TextProdChar_hpp_
|
@ -0,0 +1,3 @@ |
|||
Exemple de production automatique de texte. |
|||
|
|||
|
Loading…
Reference in new issue