Browse Source

première version

master
François 9 months ago
parent
commit
3e44eb5340
  1. 2733
      Doxyfile
  2. 14
      Makefile
  3. 6
      README.md
  4. BIN
      autoText.png
  5. 57
      cpp/Context.cpp
  6. 44
      cpp/LexFreq.cpp
  7. 55
      cpp/TestAutoText.cpp
  8. 91
      cpp/TextProdChar.cpp
  9. 1
      data/LesForcatsDuMariage-lower.txt
  10. 33
      include/Context.hpp
  11. 30
      include/LexFreq.hpp
  12. 32
      include/TextProdChar.hpp
  13. 3
      mainpage.md

2733
Doxyfile

File diff suppressed because it is too large

14
Makefile

@ -0,0 +1,14 @@
all: testAutoText doc
./testAutoText < data/LesForcatsDuMariage-lower.txt
testAutoText: cpp/TestAutoText.cpp cpp/TextProdChar.cpp include/TextProdChar.hpp cpp/LexFreq.cpp include/LexFreq.hpp cpp/Context.cpp include/Context.hpp
g++ -g -I ./include/ cpp/TestAutoText.cpp cpp/TextProdChar.cpp cpp/LexFreq.cpp cpp/Context.cpp -o testAutoText
clean:
rm -fr *~ cpp/*~ include/*~
wipe: clean
rm -fr testAutoText autoTextDoc
doc:
doxygen Doxyfile >/dev/null

6
README.md

@ -1,5 +1,5 @@
# autoText
Exemple de production automatique de texte.
Ce que certains appel IA n'est que l'application statistique dénué de sémantique.
Exemple de production automatique de texte.
Ce que certains appel IA n'est que l'application statistique dénué de sémantique.

BIN
autoText.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

57
cpp/Context.cpp

@ -0,0 +1,57 @@
#include <iostream>
#include <string>
#include <cstring>
#include <map>
#include <ctype.h>
#include "Context.hpp"
using namespace std;
// ========================================
const int Context::prevSize (10);
const string Context::end (".?!");
const string Context::sep (" ,;’-");
bool
Context::validChar (const char &c) {
if (isalpha (c))
return true;
return
end.find (c) != string::npos ||
sep.find (c) != string::npos;
}
bool
Context::isEnd (const string &c) {
return end.find (c) != string::npos;
}
bool
Context::isSep (const string &c) {
return sep.find (c) != string::npos;
}
void
Context::forward (string current) {
if (!prevSize)
return;
if (current == " " && prev.empty ())
return;
if (end.find (current) != string::npos) {
prev.clear ();
return;
}
prev += current;
if (prev.size () > prevSize) {
int count (prevSize);
for (auto rit (prev.crbegin ()); rit != prev.crend (); ++rit)
// (ASCII || start UTF) && countdown
if ((! (*rit & 0b10000000) || (*rit & 0b01000000)) && ! --count) {
prev = prev.substr (prev.crend () - rit -1);
break;
}
}
}
// ========================================

44
cpp/LexFreq.cpp

@ -0,0 +1,44 @@
#include <iostream>
#include <string>
#include <cstring>
#include <map>
#include <ctype.h>
#include "LexFreq.hpp"
using namespace std;
// ========================================
ostream&
operator << (ostream& os, const LexFreq &freqLex) {
os << " size = " << freqLex.size << endl;
for (const auto& [key, value]: freqLex.hist)
os << key << " = " << value << endl;
return os;
}
LexFreq::LexFreq () :
size (0) {
}
void
LexFreq::addChar (const string &c) {
++hist [c];
++size;
}
string
LexFreq::getChar () const {
if (!size)
return "*";
long r (rand () % size);
for (const auto& [key, value]: hist) {
if (r <= value)
return key;
r -= value;
}
return "*";
}
// ========================================

55
cpp/TestAutoText.cpp

@ -0,0 +1,55 @@
/*
La Part du feu. Les Terreurs du bourgeois Prudence et de son ami Furibus, 1873 par M.-L. Gagneur (1832-1902).
https://gallica.bnf.fr/services/engine/search/sru?operation=searchRetrieve&exactSearch=false&collapsing=true&version=1.2&query=(dc.creator%20all%20%22Marie-Louise%20Gagneur%22%20or%20dc.contributor%20all%20%22Marie-Louise%20Gagneur%22%20)%20&suggest=10&keywords=Marie-Louise%20Gagneur
https://fr.wikisource.org/wiki/La_Part_du_feu?rk=42918;4
*/
#include <iostream>
#include <fstream>
#include <cstdlib>
#include "TextProdChar.hpp"
using namespace std;
void
test () {
for (string line; getline (cin, line); ) {
for (string::iterator it (line.begin ());
it != line.end ();
++it) {
unsigned char c = *it;
if (c & 0b10000000) {
++it;
cout << c << *it << "\\";
continue;
}
cout << c << "/";
}
}
}
// ========================================
int
main (int argc, char** argv) {
cerr << endl << " ** Test TextProdChar" << endl;
// ifstream in ("in.txt");
// ofstream out ("out.txt");
TextProdChar textProdChar;
srand (0); //(time (0));
//test ();
textProdChar.learn (cin);
// cerr << textProdChar << endl;
textProdChar.prod (cout, 1000);
cerr << endl;
return 0;
}
// ========================================

91
cpp/TextProdChar.cpp

@ -0,0 +1,91 @@
#include <iostream>
#include <string>
#include <cstring>
#include <map>
#include <ctype.h>
#include <boost/algorithm/string.hpp>
#include "Context.hpp"
#include "LexFreq.hpp"
#include "TextProdChar.hpp"
using namespace std;
// ========================================
ostream&
operator << (ostream& os, const TextProdChar &ia) {
string empty;
os << ia.prevFreq.at(empty);
os << " size = " << ia.prevFreq.size () << endl;
for (const auto& [key, value]: ia.prevFreq)
os << key << " = " << value.getSize () << endl;
return os;
}
void
TextProdChar::learn (istream &in) {
string comp;
for (string line; getline (in, line); ) {
for (string::iterator it (line.begin ());
it != line.end ();
++it) {
char c = *it;
comp.clear ();
comp.push_back (c);
if (c & 0b10000000) {
if ((c & 0b11100000) == 0b11000000) {
++it; comp.push_back (*it);
} else if ((c & 0b11110000) == 0b11100000) {
++it; comp.push_back (*it);
++it; comp.push_back (*it);
} else if ((c & 0b11111000) == 0b11110000) {
++it; comp.push_back (*it);
++it; comp.push_back (*it);
++it; comp.push_back (*it);
} else {
cout << "*";
}
} else {
// ASCII
if (! Context::validChar (c))
continue;
}
if (context.empty () && (context.isSep (comp) || context.isEnd (comp)))
continue;
prevFreq[context.getState ()].addChar (comp);
context.forward (comp);
}
}
}
void
TextProdChar::prod (ostream &out, const long &size) {
context.reset ();
bool nl (false), end (false);
for (long i (0), col (0); ; ++i, ++col) {
if (i > size)
end = true;
if (col > 80)
nl = true;
bool maj (context.empty ());
string next (prevFreq[context.getState ()].getChar ());
context.forward (next);
if (maj)
boost::to_upper (next);
out << next;
if (context.isEnd (next) ||
(nl && context.isSep (next))) {
out << endl;
nl = false;
col = 0;
if (end && context.isEnd (next))
break;
}
}
out << endl;
}
// ========================================

1
data/LesForcatsDuMariage-lower.txt

File diff suppressed because one or more lines are too long

33
include/Context.hpp

@ -0,0 +1,33 @@
/*!
* @file Context.hpp
* @brief calcul de fréquence et prédiction
* @author F. Merciol
* @version 0.1
* @date 18 / août 2023
*/
#ifndef _Context_hpp_
#define _Context_hpp_
#include <iostream>
#include <map>
using namespace std;
// ========================================
class Context {
string prev;
static const int prevSize;
static const string end;
static const string sep;
public:
static bool validChar (const char &c);
static bool isEnd (const string &c);
static bool isSep (const string &c);
bool empty () { return prev.empty (); }
void reset () { prev.clear (); }
string getState () { return prev; }
void forward (string current);
};
// ========================================
#endif // _Context_hpp_

30
include/LexFreq.hpp

@ -0,0 +1,30 @@
/*!
* @file LexFreq.hpp
* @brief calcul de fréquence et prédiction
* @author F. Merciol
* @version 0.1
* @date 18 / août 2023
*/
#ifndef _LexFreq_hpp_
#define _LexFreq_hpp_
#include <iostream>
#include <map>
using namespace std;
// ========================================
class LexFreq {
long size;
map<string, long> hist;
public:
friend ostream& operator << (ostream& os, const LexFreq &freqLex);
LexFreq ();
long getSize () const { return hist.size (); }
void addChar (const string &c);
string getChar () const;
};
// ========================================
#endif // _LexFreq_hpp_

32
include/TextProdChar.hpp

@ -0,0 +1,32 @@
/*!
* @file TextProdChar.hpp
* @brief calcul de fréquence et prédiction
* @author F. Merciol
* @version 0.1
* @date 18 / août 2023
*/
#ifndef _TextProdChar_hpp_
#define _TextProdChar_hpp_
#include <iostream>
#include <map>
#include "Context.hpp"
#include "LexFreq.hpp"
using namespace std;
// ========================================
class TextProdChar {
Context context;
map<string, LexFreq> prevFreq;
public:
friend ostream& operator << (ostream& os, const TextProdChar &ia);
void learn (istream &in);
void prod (ostream &out, const long &size);
};
// ========================================
#endif // _TextProdChar_hpp_

3
mainpage.md

@ -0,0 +1,3 @@
Exemple de production automatique de texte.
Loading…
Cancel
Save