autoText/cpp/TextProdChar.cpp

94 lines
2.1 KiB
C++
Raw Permalink Normal View History

2023-08-21 08:40:23 +02:00
#include <iostream>
#include <string>
#include <cstring>
#include <map>
#include <ctype.h>
#include <boost/algorithm/string.hpp>
#include "Context.hpp"
#include "LexFreq.hpp"
#include "TextProdChar.hpp"
using namespace std;
// ========================================
ostream&
operator << (ostream& os, const TextProdChar &ia) {
string empty;
os << ia.prevFreq.at(empty);
os << " size = " << ia.prevFreq.size () << endl;
for (const auto& [key, value]: ia.prevFreq)
os << key << " = " << value.getSize () << endl;
return os;
}
void
TextProdChar::learn (istream &in) {
string comp;
for (string line; getline (in, line); ) {
for (string::iterator it (line.begin ());
it != line.end ();
++it) {
char c = *it;
comp.clear ();
comp.push_back (c);
if (c & 0b10000000) {
if ((c & 0b11100000) == 0b11000000) {
++it; comp.push_back (*it);
} else if ((c & 0b11110000) == 0b11100000) {
++it; comp.push_back (*it);
++it; comp.push_back (*it);
} else if ((c & 0b11111000) == 0b11110000) {
++it; comp.push_back (*it);
++it; comp.push_back (*it);
++it; comp.push_back (*it);
} else {
cout << "*";
}
} else {
// ASCII
if (! Context::validChar (c))
continue;
}
if (context.empty () && (context.isSep (comp) || context.isEnd (comp)))
continue;
prevFreq[context.getState ()].addChar (comp);
context.forward (comp);
}
}
}
void
TextProdChar::prod (ostream &out, const long &size) {
context.reset ();
bool nl (false), end (false);
for (long i (0), col (0); ; ++i, ++col) {
if (i > size)
end = true;
if (col > 80)
nl = true;
bool maj (context.empty ());
string next (prevFreq[context.getState ()].getChar ());
context.forward (next);
if (maj)
boost::to_upper (next);
out << next;
2023-12-10 07:20:40 +01:00
if (end && ! Context::getPrevSize ())
break;
2023-08-21 08:40:23 +02:00
if (context.isEnd (next) ||
(nl && context.isSep (next))) {
out << endl;
nl = false;
col = 0;
if (end && context.isEnd (next))
break;
}
}
out << endl;
}
// ========================================