première version

This commit is contained in:
François 2023-08-21 08:40:23 +02:00
parent c6d71faca7
commit 3e44eb5340
13 changed files with 3096 additions and 3 deletions

2733
Doxyfile Normal file

File diff suppressed because it is too large Load Diff

14
Makefile Normal file
View File

@ -0,0 +1,14 @@
all: testAutoText doc
./testAutoText < data/LesForcatsDuMariage-lower.txt
testAutoText: cpp/TestAutoText.cpp cpp/TextProdChar.cpp include/TextProdChar.hpp cpp/LexFreq.cpp include/LexFreq.hpp cpp/Context.cpp include/Context.hpp
g++ -g -I ./include/ cpp/TestAutoText.cpp cpp/TextProdChar.cpp cpp/LexFreq.cpp cpp/Context.cpp -o testAutoText
clean:
rm -fr *~ cpp/*~ include/*~
wipe: clean
rm -fr testAutoText autoTextDoc
doc:
doxygen Doxyfile >/dev/null

View File

@ -1,5 +1,5 @@
# autoText
Exemple de production automatique de texte.
Ce que certains appel IA n'est que l'application statistique dénué de sémantique.
Exemple de production automatique de texte.
Ce que certains appel IA n'est que l'application statistique dénué de sémantique.

BIN
autoText.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

57
cpp/Context.cpp Normal file
View File

@ -0,0 +1,57 @@
#include <iostream>
#include <string>
#include <cstring>
#include <map>
#include <ctype.h>
#include "Context.hpp"
using namespace std;
// ========================================
const int Context::prevSize (10);
const string Context::end (".?!");
const string Context::sep (" ,;-");
bool
Context::validChar (const char &c) {
if (isalpha (c))
return true;
return
end.find (c) != string::npos ||
sep.find (c) != string::npos;
}
bool
Context::isEnd (const string &c) {
return end.find (c) != string::npos;
}
bool
Context::isSep (const string &c) {
return sep.find (c) != string::npos;
}
void
Context::forward (string current) {
if (!prevSize)
return;
if (current == " " && prev.empty ())
return;
if (end.find (current) != string::npos) {
prev.clear ();
return;
}
prev += current;
if (prev.size () > prevSize) {
int count (prevSize);
for (auto rit (prev.crbegin ()); rit != prev.crend (); ++rit)
// (ASCII || start UTF) && countdown
if ((! (*rit & 0b10000000) || (*rit & 0b01000000)) && ! --count) {
prev = prev.substr (prev.crend () - rit -1);
break;
}
}
}
// ========================================

44
cpp/LexFreq.cpp Normal file
View File

@ -0,0 +1,44 @@
#include <iostream>
#include <string>
#include <cstring>
#include <map>
#include <ctype.h>
#include "LexFreq.hpp"
using namespace std;
// ========================================
ostream&
operator << (ostream& os, const LexFreq &freqLex) {
os << " size = " << freqLex.size << endl;
for (const auto& [key, value]: freqLex.hist)
os << key << " = " << value << endl;
return os;
}
LexFreq::LexFreq () :
size (0) {
}
void
LexFreq::addChar (const string &c) {
++hist [c];
++size;
}
string
LexFreq::getChar () const {
if (!size)
return "*";
long r (rand () % size);
for (const auto& [key, value]: hist) {
if (r <= value)
return key;
r -= value;
}
return "*";
}
// ========================================

55
cpp/TestAutoText.cpp Normal file
View File

@ -0,0 +1,55 @@
/*
La Part du feu. Les Terreurs du bourgeois Prudence et de son ami Furibus, 1873 par M.-L. Gagneur (1832-1902).
https://gallica.bnf.fr/services/engine/search/sru?operation=searchRetrieve&exactSearch=false&collapsing=true&version=1.2&query=(dc.creator%20all%20%22Marie-Louise%20Gagneur%22%20or%20dc.contributor%20all%20%22Marie-Louise%20Gagneur%22%20)%20&suggest=10&keywords=Marie-Louise%20Gagneur
https://fr.wikisource.org/wiki/La_Part_du_feu?rk=42918;4
*/
#include <iostream>
#include <fstream>
#include <cstdlib>
#include "TextProdChar.hpp"
using namespace std;
void
test () {
for (string line; getline (cin, line); ) {
for (string::iterator it (line.begin ());
it != line.end ();
++it) {
unsigned char c = *it;
if (c & 0b10000000) {
++it;
cout << c << *it << "\\";
continue;
}
cout << c << "/";
}
}
}
// ========================================
int
main (int argc, char** argv) {
cerr << endl << " ** Test TextProdChar" << endl;
// ifstream in ("in.txt");
// ofstream out ("out.txt");
TextProdChar textProdChar;
srand (0); //(time (0));
//test ();
textProdChar.learn (cin);
// cerr << textProdChar << endl;
textProdChar.prod (cout, 1000);
cerr << endl;
return 0;
}
// ========================================

91
cpp/TextProdChar.cpp Normal file
View File

@ -0,0 +1,91 @@
#include <iostream>
#include <string>
#include <cstring>
#include <map>
#include <ctype.h>
#include <boost/algorithm/string.hpp>
#include "Context.hpp"
#include "LexFreq.hpp"
#include "TextProdChar.hpp"
using namespace std;
// ========================================
ostream&
operator << (ostream& os, const TextProdChar &ia) {
string empty;
os << ia.prevFreq.at(empty);
os << " size = " << ia.prevFreq.size () << endl;
for (const auto& [key, value]: ia.prevFreq)
os << key << " = " << value.getSize () << endl;
return os;
}
void
TextProdChar::learn (istream &in) {
string comp;
for (string line; getline (in, line); ) {
for (string::iterator it (line.begin ());
it != line.end ();
++it) {
char c = *it;
comp.clear ();
comp.push_back (c);
if (c & 0b10000000) {
if ((c & 0b11100000) == 0b11000000) {
++it; comp.push_back (*it);
} else if ((c & 0b11110000) == 0b11100000) {
++it; comp.push_back (*it);
++it; comp.push_back (*it);
} else if ((c & 0b11111000) == 0b11110000) {
++it; comp.push_back (*it);
++it; comp.push_back (*it);
++it; comp.push_back (*it);
} else {
cout << "*";
}
} else {
// ASCII
if (! Context::validChar (c))
continue;
}
if (context.empty () && (context.isSep (comp) || context.isEnd (comp)))
continue;
prevFreq[context.getState ()].addChar (comp);
context.forward (comp);
}
}
}
void
TextProdChar::prod (ostream &out, const long &size) {
context.reset ();
bool nl (false), end (false);
for (long i (0), col (0); ; ++i, ++col) {
if (i > size)
end = true;
if (col > 80)
nl = true;
bool maj (context.empty ());
string next (prevFreq[context.getState ()].getChar ());
context.forward (next);
if (maj)
boost::to_upper (next);
out << next;
if (context.isEnd (next) ||
(nl && context.isSep (next))) {
out << endl;
nl = false;
col = 0;
if (end && context.isEnd (next))
break;
}
}
out << endl;
}
// ========================================

File diff suppressed because one or more lines are too long

33
include/Context.hpp Normal file
View File

@ -0,0 +1,33 @@
/*!
* @file Context.hpp
* @brief calcul de fréquence et prédiction
* @author F. Merciol
* @version 0.1
* @date 18 / août 2023
*/
#ifndef _Context_hpp_
#define _Context_hpp_
#include <iostream>
#include <map>
using namespace std;
// ========================================
class Context {
string prev;
static const int prevSize;
static const string end;
static const string sep;
public:
static bool validChar (const char &c);
static bool isEnd (const string &c);
static bool isSep (const string &c);
bool empty () { return prev.empty (); }
void reset () { prev.clear (); }
string getState () { return prev; }
void forward (string current);
};
// ========================================
#endif // _Context_hpp_

30
include/LexFreq.hpp Normal file
View File

@ -0,0 +1,30 @@
/*!
* @file LexFreq.hpp
* @brief calcul de fréquence et prédiction
* @author F. Merciol
* @version 0.1
* @date 18 / août 2023
*/
#ifndef _LexFreq_hpp_
#define _LexFreq_hpp_
#include <iostream>
#include <map>
using namespace std;
// ========================================
class LexFreq {
long size;
map<string, long> hist;
public:
friend ostream& operator << (ostream& os, const LexFreq &freqLex);
LexFreq ();
long getSize () const { return hist.size (); }
void addChar (const string &c);
string getChar () const;
};
// ========================================
#endif // _LexFreq_hpp_

32
include/TextProdChar.hpp Normal file
View File

@ -0,0 +1,32 @@
/*!
* @file TextProdChar.hpp
* @brief calcul de fréquence et prédiction
* @author F. Merciol
* @version 0.1
* @date 18 / août 2023
*/
#ifndef _TextProdChar_hpp_
#define _TextProdChar_hpp_
#include <iostream>
#include <map>
#include "Context.hpp"
#include "LexFreq.hpp"
using namespace std;
// ========================================
class TextProdChar {
Context context;
map<string, LexFreq> prevFreq;
public:
friend ostream& operator << (ostream& os, const TextProdChar &ia);
void learn (istream &in);
void prod (ostream &out, const long &size);
};
// ========================================
#endif // _TextProdChar_hpp_

3
mainpage.md Normal file
View File

@ -0,0 +1,3 @@
Exemple de production automatique de texte.