92 lines
2.0 KiB
C++
92 lines
2.0 KiB
C++
|
#include <iostream>
|
||
|
#include <string>
|
||
|
#include <cstring>
|
||
|
#include <map>
|
||
|
#include <ctype.h>
|
||
|
#include <boost/algorithm/string.hpp>
|
||
|
|
||
|
#include "Context.hpp"
|
||
|
#include "LexFreq.hpp"
|
||
|
#include "TextProdChar.hpp"
|
||
|
|
||
|
using namespace std;
|
||
|
|
||
|
// ========================================
|
||
|
ostream&
|
||
|
operator << (ostream& os, const TextProdChar &ia) {
|
||
|
string empty;
|
||
|
os << ia.prevFreq.at(empty);
|
||
|
os << " size = " << ia.prevFreq.size () << endl;
|
||
|
for (const auto& [key, value]: ia.prevFreq)
|
||
|
os << key << " = " << value.getSize () << endl;
|
||
|
return os;
|
||
|
}
|
||
|
|
||
|
|
||
|
void
|
||
|
TextProdChar::learn (istream &in) {
|
||
|
string comp;
|
||
|
for (string line; getline (in, line); ) {
|
||
|
for (string::iterator it (line.begin ());
|
||
|
it != line.end ();
|
||
|
++it) {
|
||
|
char c = *it;
|
||
|
comp.clear ();
|
||
|
comp.push_back (c);
|
||
|
|
||
|
if (c & 0b10000000) {
|
||
|
if ((c & 0b11100000) == 0b11000000) {
|
||
|
++it; comp.push_back (*it);
|
||
|
} else if ((c & 0b11110000) == 0b11100000) {
|
||
|
++it; comp.push_back (*it);
|
||
|
++it; comp.push_back (*it);
|
||
|
} else if ((c & 0b11111000) == 0b11110000) {
|
||
|
++it; comp.push_back (*it);
|
||
|
++it; comp.push_back (*it);
|
||
|
++it; comp.push_back (*it);
|
||
|
} else {
|
||
|
cout << "*";
|
||
|
}
|
||
|
} else {
|
||
|
// ASCII
|
||
|
if (! Context::validChar (c))
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (context.empty () && (context.isSep (comp) || context.isEnd (comp)))
|
||
|
continue;
|
||
|
prevFreq[context.getState ()].addChar (comp);
|
||
|
context.forward (comp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
TextProdChar::prod (ostream &out, const long &size) {
|
||
|
context.reset ();
|
||
|
bool nl (false), end (false);
|
||
|
for (long i (0), col (0); ; ++i, ++col) {
|
||
|
if (i > size)
|
||
|
end = true;
|
||
|
if (col > 80)
|
||
|
nl = true;
|
||
|
bool maj (context.empty ());
|
||
|
string next (prevFreq[context.getState ()].getChar ());
|
||
|
context.forward (next);
|
||
|
if (maj)
|
||
|
boost::to_upper (next);
|
||
|
out << next;
|
||
|
if (context.isEnd (next) ||
|
||
|
(nl && context.isSep (next))) {
|
||
|
out << endl;
|
||
|
nl = false;
|
||
|
col = 0;
|
||
|
if (end && context.isEnd (next))
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
out << endl;
|
||
|
}
|
||
|
|
||
|
// ========================================
|