première version

This commit is contained in:
François 2023-08-21 08:40:23 +02:00
parent c6d71faca7
commit 3e44eb5340
13 changed files with 3096 additions and 3 deletions

2733
Doxyfile Normal file

File diff suppressed because it is too large Load Diff

14
Makefile Normal file
View File

@ -0,0 +1,14 @@
all: testAutoText doc
./testAutoText < data/LesForcatsDuMariage-lower.txt
testAutoText: cpp/TestAutoText.cpp cpp/TextProdChar.cpp include/TextProdChar.hpp cpp/LexFreq.cpp include/LexFreq.hpp cpp/Context.cpp include/Context.hpp
g++ -g -I ./include/ cpp/TestAutoText.cpp cpp/TextProdChar.cpp cpp/LexFreq.cpp cpp/Context.cpp -o testAutoText
clean:
rm -fr *~ cpp/*~ include/*~
wipe: clean
rm -fr testAutoText autoTextDoc
doc:
doxygen Doxyfile >/dev/null

BIN
autoText.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

57
cpp/Context.cpp Normal file
View File

@ -0,0 +1,57 @@
#include <iostream>
#include <string>
#include <cstring>
#include <map>
#include <ctype.h>
#include "Context.hpp"
using namespace std;
// ========================================
const int Context::prevSize (10);
const string Context::end (".?!");
const string Context::sep (" ,;-");
bool
Context::validChar (const char &c) {
if (isalpha (c))
return true;
return
end.find (c) != string::npos ||
sep.find (c) != string::npos;
}
bool
Context::isEnd (const string &c) {
return end.find (c) != string::npos;
}
bool
Context::isSep (const string &c) {
return sep.find (c) != string::npos;
}
void
Context::forward (string current) {
if (!prevSize)
return;
if (current == " " && prev.empty ())
return;
if (end.find (current) != string::npos) {
prev.clear ();
return;
}
prev += current;
if (prev.size () > prevSize) {
int count (prevSize);
for (auto rit (prev.crbegin ()); rit != prev.crend (); ++rit)
// (ASCII || start UTF) && countdown
if ((! (*rit & 0b10000000) || (*rit & 0b01000000)) && ! --count) {
prev = prev.substr (prev.crend () - rit -1);
break;
}
}
}
// ========================================

44
cpp/LexFreq.cpp Normal file
View File

@ -0,0 +1,44 @@
#include <iostream>
#include <string>
#include <cstring>
#include <map>
#include <ctype.h>
#include "LexFreq.hpp"
using namespace std;
// ========================================
ostream&
operator << (ostream& os, const LexFreq &freqLex) {
os << " size = " << freqLex.size << endl;
for (const auto& [key, value]: freqLex.hist)
os << key << " = " << value << endl;
return os;
}
LexFreq::LexFreq () :
size (0) {
}
void
LexFreq::addChar (const string &c) {
++hist [c];
++size;
}
string
LexFreq::getChar () const {
if (!size)
return "*";
long r (rand () % size);
for (const auto& [key, value]: hist) {
if (r <= value)
return key;
r -= value;
}
return "*";
}
// ========================================

55
cpp/TestAutoText.cpp Normal file
View File

@ -0,0 +1,55 @@
/*
La Part du feu. Les Terreurs du bourgeois Prudence et de son ami Furibus, 1873 par M.-L. Gagneur (1832-1902).
https://gallica.bnf.fr/services/engine/search/sru?operation=searchRetrieve&exactSearch=false&collapsing=true&version=1.2&query=(dc.creator%20all%20%22Marie-Louise%20Gagneur%22%20or%20dc.contributor%20all%20%22Marie-Louise%20Gagneur%22%20)%20&suggest=10&keywords=Marie-Louise%20Gagneur
https://fr.wikisource.org/wiki/La_Part_du_feu?rk=42918;4
*/
#include <iostream>
#include <fstream>
#include <cstdlib>
#include "TextProdChar.hpp"
using namespace std;
void
test () {
for (string line; getline (cin, line); ) {
for (string::iterator it (line.begin ());
it != line.end ();
++it) {
unsigned char c = *it;
if (c & 0b10000000) {
++it;
cout << c << *it << "\\";
continue;
}
cout << c << "/";
}
}
}
// ========================================
int
main (int argc, char** argv) {
cerr << endl << " ** Test TextProdChar" << endl;
// ifstream in ("in.txt");
// ofstream out ("out.txt");
TextProdChar textProdChar;
srand (0); //(time (0));
//test ();
textProdChar.learn (cin);
// cerr << textProdChar << endl;
textProdChar.prod (cout, 1000);
cerr << endl;
return 0;
}
// ========================================

91
cpp/TextProdChar.cpp Normal file
View File

@ -0,0 +1,91 @@
#include <iostream>
#include <string>
#include <cstring>
#include <map>
#include <ctype.h>
#include <boost/algorithm/string.hpp>
#include "Context.hpp"
#include "LexFreq.hpp"
#include "TextProdChar.hpp"
using namespace std;
// ========================================
ostream&
operator << (ostream& os, const TextProdChar &ia) {
string empty;
os << ia.prevFreq.at(empty);
os << " size = " << ia.prevFreq.size () << endl;
for (const auto& [key, value]: ia.prevFreq)
os << key << " = " << value.getSize () << endl;
return os;
}
void
TextProdChar::learn (istream &in) {
string comp;
for (string line; getline (in, line); ) {
for (string::iterator it (line.begin ());
it != line.end ();
++it) {
char c = *it;
comp.clear ();
comp.push_back (c);
if (c & 0b10000000) {
if ((c & 0b11100000) == 0b11000000) {
++it; comp.push_back (*it);
} else if ((c & 0b11110000) == 0b11100000) {
++it; comp.push_back (*it);
++it; comp.push_back (*it);
} else if ((c & 0b11111000) == 0b11110000) {
++it; comp.push_back (*it);
++it; comp.push_back (*it);
++it; comp.push_back (*it);
} else {
cout << "*";
}
} else {
// ASCII
if (! Context::validChar (c))
continue;
}
if (context.empty () && (context.isSep (comp) || context.isEnd (comp)))
continue;
prevFreq[context.getState ()].addChar (comp);
context.forward (comp);
}
}
}
void
TextProdChar::prod (ostream &out, const long &size) {
context.reset ();
bool nl (false), end (false);
for (long i (0), col (0); ; ++i, ++col) {
if (i > size)
end = true;
if (col > 80)
nl = true;
bool maj (context.empty ());
string next (prevFreq[context.getState ()].getChar ());
context.forward (next);
if (maj)
boost::to_upper (next);
out << next;
if (context.isEnd (next) ||
(nl && context.isSep (next))) {
out << endl;
nl = false;
col = 0;
if (end && context.isEnd (next))
break;
}
}
out << endl;
}
// ========================================

File diff suppressed because one or more lines are too long

33
include/Context.hpp Normal file
View File

@ -0,0 +1,33 @@
/*!
* @file Context.hpp
* @brief calcul de fréquence et prédiction
* @author F. Merciol
* @version 0.1
* @date 18 / août 2023
*/
#ifndef _Context_hpp_
#define _Context_hpp_
#include <iostream>
#include <map>
using namespace std;
// ========================================
class Context {
string prev;
static const int prevSize;
static const string end;
static const string sep;
public:
static bool validChar (const char &c);
static bool isEnd (const string &c);
static bool isSep (const string &c);
bool empty () { return prev.empty (); }
void reset () { prev.clear (); }
string getState () { return prev; }
void forward (string current);
};
// ========================================
#endif // _Context_hpp_

30
include/LexFreq.hpp Normal file
View File

@ -0,0 +1,30 @@
/*!
* @file LexFreq.hpp
* @brief calcul de fréquence et prédiction
* @author F. Merciol
* @version 0.1
* @date 18 / août 2023
*/
#ifndef _LexFreq_hpp_
#define _LexFreq_hpp_
#include <iostream>
#include <map>
using namespace std;
// ========================================
class LexFreq {
long size;
map<string, long> hist;
public:
friend ostream& operator << (ostream& os, const LexFreq &freqLex);
LexFreq ();
long getSize () const { return hist.size (); }
void addChar (const string &c);
string getChar () const;
};
// ========================================
#endif // _LexFreq_hpp_

32
include/TextProdChar.hpp Normal file
View File

@ -0,0 +1,32 @@
/*!
* @file TextProdChar.hpp
* @brief calcul de fréquence et prédiction
* @author F. Merciol
* @version 0.1
* @date 18 / août 2023
*/
#ifndef _TextProdChar_hpp_
#define _TextProdChar_hpp_
#include <iostream>
#include <map>
#include "Context.hpp"
#include "LexFreq.hpp"
using namespace std;
// ========================================
class TextProdChar {
Context context;
map<string, LexFreq> prevFreq;
public:
friend ostream& operator << (ostream& os, const TextProdChar &ia);
void learn (istream &in);
void prod (ostream &out, const long &size);
};
// ========================================
#endif // _TextProdChar_hpp_

3
mainpage.md Normal file
View File

@ -0,0 +1,3 @@
Exemple de production automatique de texte.