mirror of https://github.com/procxx/kepka.git
Alpha 1.0.11: emoji parsing fixed.
Now we ignore all 0xFE0F characters, because some other apps send or not send them randomly, not like iOS/macOS.
This commit is contained in:
parent
0411f05c39
commit
d259656e89
|
@ -1760,38 +1760,59 @@ void appendCategory(Data &result, const InputCategory &category, const VariatedI
|
||||||
append(bareId, code);
|
append(bareId, code);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (emoji.id.isEmpty()) {
|
if (bareId.isEmpty()) {
|
||||||
logDataError() << "empty emoji id found.";
|
logDataError() << "empty emoji id found.";
|
||||||
result = Data();
|
result = Data();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
auto it = result.map.find(emoji.id);
|
auto it = result.map.find(bareId);
|
||||||
if (it == result.map.cend()) {
|
if (it == result.map.cend()) {
|
||||||
it = result.map.insert(make_pair(emoji.id, result.list.size())).first;
|
it = result.map.insert(make_pair(bareId, result.list.size())).first;
|
||||||
result.list.push_back(move(emoji));
|
result.list.push_back(move(emoji));
|
||||||
} else if (result.list[it->second].postfixed != emoji.postfixed) {
|
} else if (result.list[it->second].postfixed != emoji.postfixed) {
|
||||||
logDataError() << "same emoji found with different postfixed property.";
|
logDataError() << "same emoji found with different postfixed property.";
|
||||||
result = Data();
|
result = Data();
|
||||||
return;
|
return;
|
||||||
|
} else if (result.list[it->second].id != emoji.id) {
|
||||||
|
logDataError() << "same emoji found with different id.";
|
||||||
|
result = Data();
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if (variatedIds.find(bareId) != variatedIds.cend()) {
|
if (variatedIds.find(bareId) != variatedIds.cend()) {
|
||||||
result.list[it->second].variated = true;
|
result.list[it->second].variated = true;
|
||||||
|
|
||||||
auto baseId = Id();
|
auto baseId = Id();
|
||||||
append(baseId, *from++);
|
if (*from == kPostfix) {
|
||||||
if (from != to && *from == kPostfix) {
|
logDataError() << "bad first symbol in emoji.";
|
||||||
++from;
|
result = Data();
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
append(baseId, *from++);
|
||||||
for (auto color : Colors) {
|
for (auto color : Colors) {
|
||||||
auto colored = Emoji();
|
auto colored = Emoji();
|
||||||
colored.id = baseId;
|
colored.id = baseId;
|
||||||
colored.colored = true;
|
colored.colored = true;
|
||||||
append(colored.id, color);
|
append(colored.id, color);
|
||||||
|
auto bareColoredId = colored.id;
|
||||||
for (auto i = from; i != to; ++i) {
|
for (auto i = from; i != to; ++i) {
|
||||||
append(colored.id, *i);
|
append(colored.id, *i);
|
||||||
|
if (*i != kPostfix) {
|
||||||
|
append(bareColoredId, *i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto it = result.map.find(bareColoredId);
|
||||||
|
if (it == result.map.cend()) {
|
||||||
|
it = result.map.insert(make_pair(bareColoredId, result.list.size())).first;
|
||||||
|
result.list.push_back(move(colored));
|
||||||
|
} else if (result.list[it->second].postfixed != colored.postfixed) {
|
||||||
|
logDataError() << "same emoji found with different postfixed property.";
|
||||||
|
result = Data();
|
||||||
|
return;
|
||||||
|
} else if (result.list[it->second].id != colored.id) {
|
||||||
|
logDataError() << "same emoji found with different id.";
|
||||||
|
result = Data();
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
result.map.insert(make_pair(colored.id, result.list.size()));
|
|
||||||
result.list.push_back(move(colored));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result.categories.back().push_back(it->second);
|
result.categories.back().push_back(it->second);
|
||||||
|
|
|
@ -467,7 +467,9 @@ EmojiPack GetPack(DBIEmojiTab tab) {\n\
|
||||||
bool Generator::writeFindReplace() {
|
bool Generator::writeFindReplace() {
|
||||||
source_->stream() << "\
|
source_->stream() << "\
|
||||||
\n\
|
\n\
|
||||||
EmojiPtr FindReplace(const QChar *ch, const QChar *end, int *outLength) {\n";
|
EmojiPtr FindReplace(const QChar *start, const QChar *end, int *outLength) {\n\
|
||||||
|
auto ch = start;\n\
|
||||||
|
\n";
|
||||||
|
|
||||||
if (!writeFindFromDictionary(data_.replaces)) {
|
if (!writeFindFromDictionary(data_.replaces)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -482,9 +484,11 @@ EmojiPtr FindReplace(const QChar *ch, const QChar *end, int *outLength) {\n";
|
||||||
bool Generator::writeFind() {
|
bool Generator::writeFind() {
|
||||||
source_->stream() << "\
|
source_->stream() << "\
|
||||||
\n\
|
\n\
|
||||||
EmojiPtr Find(const QChar *ch, const QChar *end, int *outLength) {\n";
|
EmojiPtr Find(const QChar *start, const QChar *end, int *outLength) {\n\
|
||||||
|
auto ch = start;\n\
|
||||||
|
\n";
|
||||||
|
|
||||||
if (!writeFindFromDictionary(data_.map)) {
|
if (!writeFindFromDictionary(data_.map, true)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -495,70 +499,7 @@ EmojiPtr Find(const QChar *ch, const QChar *end, int *outLength) {\n";
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Generator::writeFindFromDictionary(const std::map<QString, int, std::greater<QString>> &dictionary) {
|
bool Generator::writeFindFromDictionary(const std::map<QString, int, std::greater<QString>> &dictionary, bool skipPostfixes) {
|
||||||
// That one was slower..
|
|
||||||
//
|
|
||||||
//using Map = std::map<QString, int, std::greater<QString>>;
|
|
||||||
//Map small; // 0-127
|
|
||||||
//Map medium; // 128-255
|
|
||||||
//Map large; // 256-65535
|
|
||||||
//Map other; // surrogates
|
|
||||||
//for (auto &item : dictionary) {
|
|
||||||
// auto key = item.first;
|
|
||||||
// auto first = key.isEmpty() ? QChar(0) : QChar(key[0]);
|
|
||||||
// if (!first.unicode() || first.isLowSurrogate() || (first.isHighSurrogate() && (key.size() < 2 || !QChar(key[1]).isLowSurrogate()))) {
|
|
||||||
// logDataError() << "bad key.";
|
|
||||||
// return false;
|
|
||||||
// }
|
|
||||||
// if (first.isHighSurrogate()) {
|
|
||||||
// other.insert(item);
|
|
||||||
// } else if (first.unicode() >= 256) {
|
|
||||||
// if (first.unicode() >= 0xE000) {
|
|
||||||
// // Currently if we'll have codes from both below and above the surrogates
|
|
||||||
// // we'll return nullptr without checking the surrogates, because we first
|
|
||||||
// // check those codes, applying the min-max range of codes from "large".
|
|
||||||
// logDataError() << "codes after the surrogates are not supported.";
|
|
||||||
// return false;
|
|
||||||
// }
|
|
||||||
// large.insert(item);
|
|
||||||
// } else if (first.unicode() >= 128) {
|
|
||||||
// medium.insert(item);
|
|
||||||
// } else {
|
|
||||||
// small.insert(item);
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
//auto smallMinCheck = (medium.empty() && large.empty() && other.empty()) ? -1 : 0;
|
|
||||||
//auto smallMaxCheck = (medium.empty() && large.empty() && other.empty()) ? -1 : 128;
|
|
||||||
//if (!writeFindFromOneDictionary(small, smallMinCheck, smallMaxCheck)) {
|
|
||||||
// return false;
|
|
||||||
//}
|
|
||||||
//auto mediumMinCheck = (large.empty() && other.empty()) ? -1 : 128;
|
|
||||||
//auto mediumMaxCheck = (large.empty() && other.empty()) ? -1 : 256;
|
|
||||||
//if (!writeFindFromOneDictionary(medium, mediumMinCheck, mediumMaxCheck)) {
|
|
||||||
// return false;
|
|
||||||
//}
|
|
||||||
//if (!writeFindFromOneDictionary(large, other.empty() ? -1 : 0)) {
|
|
||||||
// return false;
|
|
||||||
//}
|
|
||||||
//if (!writeFindFromOneDictionary(other)) {
|
|
||||||
// return false;
|
|
||||||
//}
|
|
||||||
|
|
||||||
if (!writeFindFromOneDictionary(dictionary)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
source_->stream() << "\
|
|
||||||
return nullptr;\n";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// min < 0 - no outer min-max check
|
|
||||||
// max < 0 - this is last checked dictionary
|
|
||||||
bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::greater<QString>> &dictionary, int min, int max) {
|
|
||||||
if (dictionary.empty()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto tabs = [](int size) {
|
auto tabs = [](int size) {
|
||||||
return QString(size, '\t');
|
return QString(size, '\t');
|
||||||
};
|
};
|
||||||
|
@ -572,35 +513,24 @@ bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::gre
|
||||||
uniqueFirstChars[ch] = 0;
|
uniqueFirstChars[ch] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto writeBoundsCondition = false;//(uniqueFirstChars.size() > 4);
|
|
||||||
auto haveOuterCondition = false;
|
|
||||||
if (min >= 0 && max > min) {
|
|
||||||
haveOuterCondition = true;
|
|
||||||
source_->stream() << "\
|
|
||||||
if (ch->unicode() >= " << min << " && ch->unicode() < " << max << ") {\n";
|
|
||||||
if (writeBoundsCondition) {
|
|
||||||
source_->stream() << "\
|
|
||||||
if (ch->unicode() < " << foundMin << " || ch->unicode() > " << foundMax << ") {\n\
|
|
||||||
return nullptr;\n\
|
|
||||||
}\n\n";
|
|
||||||
}
|
|
||||||
} else if (writeBoundsCondition) {
|
|
||||||
haveOuterCondition = true;
|
|
||||||
source_->stream() << "\
|
|
||||||
if (ch->unicode() >= " << foundMin << " && ch->unicode() <= " << foundMax << ") {\n";
|
|
||||||
}
|
|
||||||
enum class UsedCheckType {
|
enum class UsedCheckType {
|
||||||
Switch,
|
Switch,
|
||||||
If,
|
If,
|
||||||
UpcomingIf,
|
|
||||||
};
|
};
|
||||||
auto checkTypes = QVector<UsedCheckType>();
|
auto checkTypes = QVector<UsedCheckType>();
|
||||||
auto existsTill = QVector<int>(1, 1);
|
|
||||||
auto chars = QString();
|
auto chars = QString();
|
||||||
auto tabsUsed = haveOuterCondition ? 2 : 1;
|
auto tabsUsed = 1;
|
||||||
|
|
||||||
|
auto writeSkipPostfix = [this, &tabs, skipPostfixes](int tabsCount) {
|
||||||
|
if (skipPostfixes) {
|
||||||
|
source_->stream() << tabs(tabsCount) << "if (++ch != end && ch->unicode() == kPostfix) ++ch;\n";
|
||||||
|
} else {
|
||||||
|
source_->stream() << tabs(tabsCount) << "++ch;\n";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Returns true if at least one check was finished.
|
// Returns true if at least one check was finished.
|
||||||
auto finishChecksTillKey = [this, &chars, &checkTypes, &existsTill, &tabsUsed, tabs](const QString &key) {
|
auto finishChecksTillKey = [this, &chars, &checkTypes, &tabsUsed, tabs](const QString &key) {
|
||||||
auto result = false;
|
auto result = false;
|
||||||
while (!chars.isEmpty() && key.midRef(0, chars.size()) != chars) {
|
while (!chars.isEmpty() && key.midRef(0, chars.size()) != chars) {
|
||||||
result = true;
|
result = true;
|
||||||
|
@ -615,7 +545,6 @@ bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::gre
|
||||||
}
|
}
|
||||||
if ((!chars.isEmpty() && key.midRef(0, chars.size()) != chars) || key == chars) {
|
if ((!chars.isEmpty() && key.midRef(0, chars.size()) != chars) || key == chars) {
|
||||||
source_->stream() << tabs(tabsUsed) << "}\n";
|
source_->stream() << tabs(tabsUsed) << "}\n";
|
||||||
existsTill.pop_back();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -638,32 +567,6 @@ bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::gre
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Get minimal length of key that has first "charIndex" chars same as it
|
|
||||||
// and has at least one more char after them.
|
|
||||||
auto getMinimalLength = [](auto it, auto end, int charIndex) {
|
|
||||||
auto key = it->first;
|
|
||||||
auto result = key.size();
|
|
||||||
auto i = it;
|
|
||||||
auto keyStart = key.mid(0, charIndex);
|
|
||||||
for (++i; i != end; ++i) {
|
|
||||||
auto nextKey = i->first;
|
|
||||||
if (nextKey.mid(0, charIndex) != keyStart || nextKey.size() <= charIndex) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (result > nextKey.size()) {
|
|
||||||
result = nextKey.size();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
};
|
|
||||||
|
|
||||||
auto getUnicodePointer = [](int index) {
|
|
||||||
if (index > 0) {
|
|
||||||
return "(ch + " + QString::number(index) + ')';
|
|
||||||
}
|
|
||||||
return QString("ch");
|
|
||||||
};
|
|
||||||
|
|
||||||
for (auto i = dictionary.cbegin(), e = dictionary.cend(); i != e; ++i) {
|
for (auto i = dictionary.cbegin(), e = dictionary.cend(); i != e; ++i) {
|
||||||
auto &item = *i;
|
auto &item = *i;
|
||||||
auto key = item.first;
|
auto key = item.first;
|
||||||
|
@ -671,55 +574,24 @@ bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::gre
|
||||||
while (chars.size() != key.size()) {
|
while (chars.size() != key.size()) {
|
||||||
auto checking = chars.size();
|
auto checking = chars.size();
|
||||||
auto keyChar = key[checking];
|
auto keyChar = key[checking];
|
||||||
auto checkedAlready = (checkTypes.size() > checking);
|
auto keyCharString = "0x" + QString::number(keyChar.unicode(), 16);
|
||||||
if (!checkedAlready) {
|
auto usedIfForCheck = !weContinueOldSwitch && canUseIfForCheck(i, e, checking);
|
||||||
auto keyCharString = "0x" + QString::number(keyChar.unicode(), 16);
|
if (weContinueOldSwitch) {
|
||||||
auto usedIfForCheck = false;
|
weContinueOldSwitch = false;
|
||||||
if (weContinueOldSwitch) {
|
} else if (!usedIfForCheck) {
|
||||||
weContinueOldSwitch = false;
|
source_->stream() << tabs(tabsUsed) << "if (ch != end) switch (ch->unicode()) {\n";
|
||||||
source_->stream() << tabs(tabsUsed) << "case " << keyCharString << ":\n";
|
|
||||||
} else {
|
|
||||||
auto canCheckByIfCount = 0;
|
|
||||||
for (; checking + canCheckByIfCount != key.size(); ++canCheckByIfCount) {
|
|
||||||
if (!canUseIfForCheck(i, e, checking + canCheckByIfCount)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
auto canCheckTill = getMinimalLength(i, e, checking);
|
|
||||||
auto checkedAlready = !existsTill.isEmpty() && (existsTill.back() == canCheckTill);
|
|
||||||
if (checking + canCheckByIfCount - 1 > canCheckTill
|
|
||||||
|| checking > canCheckTill
|
|
||||||
|| (!existsTill.isEmpty() && existsTill.back() > canCheckTill)) {
|
|
||||||
logDataError() << "something wrong with the algo.";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
auto condition = checkedAlready ? QString() : ("ch + " + QString::number(canCheckTill - 1) + " " + (canCheckTill == checking + 1 ? "!=" : "<") + " end");
|
|
||||||
existsTill.push_back(canCheckTill);
|
|
||||||
if (canCheckByIfCount > 0) {
|
|
||||||
auto checkStrings = QStringList();
|
|
||||||
for (auto checkByIf = 0; checkByIf != canCheckByIfCount; ++checkByIf) {
|
|
||||||
checkStrings.push_back(getUnicodePointer(checking + checkByIf) + "->unicode() == 0x" + QString::number(key[checking + checkByIf].unicode(), 16));
|
|
||||||
}
|
|
||||||
if (!condition.isEmpty()) {
|
|
||||||
checkStrings.push_front(condition);
|
|
||||||
}
|
|
||||||
for (auto upcomingChecked = 1; upcomingChecked != canCheckByIfCount; ++upcomingChecked) {
|
|
||||||
checkTypes.push_back(UsedCheckType::UpcomingIf);
|
|
||||||
}
|
|
||||||
source_->stream() << tabs(tabsUsed) << "if (" << checkStrings.join(" && ") << ") {\n";
|
|
||||||
usedIfForCheck = true;
|
|
||||||
} else {
|
|
||||||
source_->stream() << tabs(tabsUsed) << (condition.isEmpty() ? "" : "if (" + condition + ") ") << "switch (" << getUnicodePointer(checking) << "->unicode()) {\n";
|
|
||||||
source_->stream() << tabs(tabsUsed) << "case " << keyCharString << ":\n";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
checkTypes.push_back(usedIfForCheck ? UsedCheckType::If : UsedCheckType::Switch);
|
|
||||||
++tabsUsed;
|
|
||||||
}
|
}
|
||||||
|
if (usedIfForCheck) {
|
||||||
|
source_->stream() << tabs(tabsUsed) << "if (ch != end && ch->unicode() == " << keyCharString << ") {\n";
|
||||||
|
checkTypes.push_back(UsedCheckType::If);
|
||||||
|
} else {
|
||||||
|
source_->stream() << tabs(tabsUsed) << "case " << keyCharString << ":\n";
|
||||||
|
checkTypes.push_back(UsedCheckType::Switch);
|
||||||
|
}
|
||||||
|
writeSkipPostfix(++tabsUsed);
|
||||||
chars.push_back(keyChar);
|
chars.push_back(keyChar);
|
||||||
}
|
}
|
||||||
source_->stream() << tabs(tabsUsed) << "if (outLength) *outLength = " << chars.size() << ";\n";
|
source_->stream() << tabs(tabsUsed) << "if (outLength) *outLength = (ch - start);\n";
|
||||||
|
|
||||||
// While IsReplaceEdge() currently is always true we just return the value.
|
// While IsReplaceEdge() currently is always true we just return the value.
|
||||||
//source_->stream() << tabs(1 + chars.size()) << "if (ch + " << chars.size() << " == end || IsReplaceEdge(*(ch + " << chars.size() << ")) || (ch + " << chars.size() << ")->unicode() == ' ') {\n";
|
//source_->stream() << tabs(1 + chars.size()) << "if (ch + " << chars.size() << " == end || IsReplaceEdge(*(ch + " << chars.size() << ")) || (ch + " << chars.size() << ")->unicode() == ' ') {\n";
|
||||||
|
@ -729,15 +601,9 @@ bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::gre
|
||||||
}
|
}
|
||||||
finishChecksTillKey(QString());
|
finishChecksTillKey(QString());
|
||||||
|
|
||||||
if (min >= 0) { // not the last dictionary
|
source_->stream() << "\
|
||||||
source_->stream() << tabs(tabsUsed) << "return nullptr;\n";
|
\n\
|
||||||
}
|
return nullptr;\n";
|
||||||
if (haveOuterCondition) {
|
|
||||||
source_->stream() << "\
|
|
||||||
}\n";
|
|
||||||
}
|
|
||||||
source_->stream() << "\n";
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -47,11 +47,7 @@ private:
|
||||||
bool writePacks();
|
bool writePacks();
|
||||||
bool writeFindReplace();
|
bool writeFindReplace();
|
||||||
bool writeFind();
|
bool writeFind();
|
||||||
bool writeFindFromDictionary(const std::map<QString, int, std::greater<QString>> &dictionary);
|
bool writeFindFromDictionary(const std::map<QString, int, std::greater<QString>> &dictionary, bool skipPostfixes = false);
|
||||||
|
|
||||||
// min < 0 - this is last checked dictionary
|
|
||||||
// max < 0 - no outer min-max check
|
|
||||||
bool writeFindFromOneDictionary(const std::map<QString, int, std::greater<QString>> &dictionary, int min = -1, int max = -1);
|
|
||||||
|
|
||||||
const common::ProjectInfo &project_;
|
const common::ProjectInfo &project_;
|
||||||
int colorsCount_ = 0;
|
int colorsCount_ = 0;
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -128,21 +128,8 @@ inline EmojiPtr FromUrl(const QString &url) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline EmojiPtr Find(const QChar *ch, const QChar *end, int *outLength = nullptr) {
|
inline EmojiPtr Find(const QChar *start, const QChar *end, int *outLength = nullptr) {
|
||||||
if (ch != end) {
|
return internal::Find(start, end, outLength);
|
||||||
if (auto result = internal::Find(ch, end, outLength)) {
|
|
||||||
if (outLength && result->hasPostfix()) {
|
|
||||||
// Try to consume a pending 0xFE0F postfix.
|
|
||||||
// Comment out hasPostfix() check if you want to consume it anyway.
|
|
||||||
auto resultEnd = ch + *outLength;
|
|
||||||
if (resultEnd != end && resultEnd->unicode() == kPostfix) {
|
|
||||||
++*outLength;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nullptr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline EmojiPtr Find(const QString &text, int *outLength = nullptr) {
|
inline EmojiPtr Find(const QString &text, int *outLength = nullptr) {
|
||||||
|
|
Loading…
Reference in New Issue