Alpha 1.0.11: emoji parsing fixed.

Now we ignore all 0xFE0F characters, because some other
apps send or not send them randomly, not like iOS/macOS.
This commit is contained in:
John Preston 2017-02-17 20:31:46 +03:00
parent 0411f05c39
commit d259656e89
5 changed files with 6251 additions and 3531 deletions

View File

@ -1760,38 +1760,59 @@ void appendCategory(Data &result, const InputCategory &category, const VariatedI
append(bareId, code);
}
}
if (emoji.id.isEmpty()) {
if (bareId.isEmpty()) {
logDataError() << "empty emoji id found.";
result = Data();
return;
}
auto it = result.map.find(emoji.id);
auto it = result.map.find(bareId);
if (it == result.map.cend()) {
it = result.map.insert(make_pair(emoji.id, result.list.size())).first;
it = result.map.insert(make_pair(bareId, result.list.size())).first;
result.list.push_back(move(emoji));
} else if (result.list[it->second].postfixed != emoji.postfixed) {
logDataError() << "same emoji found with different postfixed property.";
result = Data();
return;
} else if (result.list[it->second].id != emoji.id) {
logDataError() << "same emoji found with different id.";
result = Data();
return;
}
if (variatedIds.find(bareId) != variatedIds.cend()) {
result.list[it->second].variated = true;
auto baseId = Id();
append(baseId, *from++);
if (from != to && *from == kPostfix) {
++from;
if (*from == kPostfix) {
logDataError() << "bad first symbol in emoji.";
result = Data();
return;
}
append(baseId, *from++);
for (auto color : Colors) {
auto colored = Emoji();
colored.id = baseId;
colored.colored = true;
append(colored.id, color);
auto bareColoredId = colored.id;
for (auto i = from; i != to; ++i) {
append(colored.id, *i);
if (*i != kPostfix) {
append(bareColoredId, *i);
}
}
auto it = result.map.find(bareColoredId);
if (it == result.map.cend()) {
it = result.map.insert(make_pair(bareColoredId, result.list.size())).first;
result.list.push_back(move(colored));
} else if (result.list[it->second].postfixed != colored.postfixed) {
logDataError() << "same emoji found with different postfixed property.";
result = Data();
return;
} else if (result.list[it->second].id != colored.id) {
logDataError() << "same emoji found with different id.";
result = Data();
return;
}
result.map.insert(make_pair(colored.id, result.list.size()));
result.list.push_back(move(colored));
}
}
result.categories.back().push_back(it->second);

View File

@ -467,7 +467,9 @@ EmojiPack GetPack(DBIEmojiTab tab) {\n\
bool Generator::writeFindReplace() {
source_->stream() << "\
\n\
EmojiPtr FindReplace(const QChar *ch, const QChar *end, int *outLength) {\n";
EmojiPtr FindReplace(const QChar *start, const QChar *end, int *outLength) {\n\
auto ch = start;\n\
\n";
if (!writeFindFromDictionary(data_.replaces)) {
return false;
@ -482,9 +484,11 @@ EmojiPtr FindReplace(const QChar *ch, const QChar *end, int *outLength) {\n";
bool Generator::writeFind() {
source_->stream() << "\
\n\
EmojiPtr Find(const QChar *ch, const QChar *end, int *outLength) {\n";
EmojiPtr Find(const QChar *start, const QChar *end, int *outLength) {\n\
auto ch = start;\n\
\n";
if (!writeFindFromDictionary(data_.map)) {
if (!writeFindFromDictionary(data_.map, true)) {
return false;
}
@ -495,70 +499,7 @@ EmojiPtr Find(const QChar *ch, const QChar *end, int *outLength) {\n";
return true;
}
bool Generator::writeFindFromDictionary(const std::map<QString, int, std::greater<QString>> &dictionary) {
// That one was slower..
//
//using Map = std::map<QString, int, std::greater<QString>>;
//Map small; // 0-127
//Map medium; // 128-255
//Map large; // 256-65535
//Map other; // surrogates
//for (auto &item : dictionary) {
// auto key = item.first;
// auto first = key.isEmpty() ? QChar(0) : QChar(key[0]);
// if (!first.unicode() || first.isLowSurrogate() || (first.isHighSurrogate() && (key.size() < 2 || !QChar(key[1]).isLowSurrogate()))) {
// logDataError() << "bad key.";
// return false;
// }
// if (first.isHighSurrogate()) {
// other.insert(item);
// } else if (first.unicode() >= 256) {
// if (first.unicode() >= 0xE000) {
// // Currently if we'll have codes from both below and above the surrogates
// // we'll return nullptr without checking the surrogates, because we first
// // check those codes, applying the min-max range of codes from "large".
// logDataError() << "codes after the surrogates are not supported.";
// return false;
// }
// large.insert(item);
// } else if (first.unicode() >= 128) {
// medium.insert(item);
// } else {
// small.insert(item);
// }
//}
//auto smallMinCheck = (medium.empty() && large.empty() && other.empty()) ? -1 : 0;
//auto smallMaxCheck = (medium.empty() && large.empty() && other.empty()) ? -1 : 128;
//if (!writeFindFromOneDictionary(small, smallMinCheck, smallMaxCheck)) {
// return false;
//}
//auto mediumMinCheck = (large.empty() && other.empty()) ? -1 : 128;
//auto mediumMaxCheck = (large.empty() && other.empty()) ? -1 : 256;
//if (!writeFindFromOneDictionary(medium, mediumMinCheck, mediumMaxCheck)) {
// return false;
//}
//if (!writeFindFromOneDictionary(large, other.empty() ? -1 : 0)) {
// return false;
//}
//if (!writeFindFromOneDictionary(other)) {
// return false;
//}
if (!writeFindFromOneDictionary(dictionary)) {
return false;
}
source_->stream() << "\
return nullptr;\n";
return true;
}
// min < 0 - no outer min-max check
// max < 0 - this is last checked dictionary
bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::greater<QString>> &dictionary, int min, int max) {
if (dictionary.empty()) {
return true;
}
bool Generator::writeFindFromDictionary(const std::map<QString, int, std::greater<QString>> &dictionary, bool skipPostfixes) {
auto tabs = [](int size) {
return QString(size, '\t');
};
@ -572,35 +513,24 @@ bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::gre
uniqueFirstChars[ch] = 0;
}
auto writeBoundsCondition = false;//(uniqueFirstChars.size() > 4);
auto haveOuterCondition = false;
if (min >= 0 && max > min) {
haveOuterCondition = true;
source_->stream() << "\
if (ch->unicode() >= " << min << " && ch->unicode() < " << max << ") {\n";
if (writeBoundsCondition) {
source_->stream() << "\
if (ch->unicode() < " << foundMin << " || ch->unicode() > " << foundMax << ") {\n\
return nullptr;\n\
}\n\n";
}
} else if (writeBoundsCondition) {
haveOuterCondition = true;
source_->stream() << "\
if (ch->unicode() >= " << foundMin << " && ch->unicode() <= " << foundMax << ") {\n";
}
enum class UsedCheckType {
Switch,
If,
UpcomingIf,
};
auto checkTypes = QVector<UsedCheckType>();
auto existsTill = QVector<int>(1, 1);
auto chars = QString();
auto tabsUsed = haveOuterCondition ? 2 : 1;
auto tabsUsed = 1;
auto writeSkipPostfix = [this, &tabs, skipPostfixes](int tabsCount) {
if (skipPostfixes) {
source_->stream() << tabs(tabsCount) << "if (++ch != end && ch->unicode() == kPostfix) ++ch;\n";
} else {
source_->stream() << tabs(tabsCount) << "++ch;\n";
}
};
// Returns true if at least one check was finished.
auto finishChecksTillKey = [this, &chars, &checkTypes, &existsTill, &tabsUsed, tabs](const QString &key) {
auto finishChecksTillKey = [this, &chars, &checkTypes, &tabsUsed, tabs](const QString &key) {
auto result = false;
while (!chars.isEmpty() && key.midRef(0, chars.size()) != chars) {
result = true;
@ -615,7 +545,6 @@ bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::gre
}
if ((!chars.isEmpty() && key.midRef(0, chars.size()) != chars) || key == chars) {
source_->stream() << tabs(tabsUsed) << "}\n";
existsTill.pop_back();
}
}
}
@ -638,32 +567,6 @@ bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::gre
return true;
};
// Get minimal length of key that has first "charIndex" chars same as it
// and has at least one more char after them.
auto getMinimalLength = [](auto it, auto end, int charIndex) {
auto key = it->first;
auto result = key.size();
auto i = it;
auto keyStart = key.mid(0, charIndex);
for (++i; i != end; ++i) {
auto nextKey = i->first;
if (nextKey.mid(0, charIndex) != keyStart || nextKey.size() <= charIndex) {
break;
}
if (result > nextKey.size()) {
result = nextKey.size();
}
}
return result;
};
auto getUnicodePointer = [](int index) {
if (index > 0) {
return "(ch + " + QString::number(index) + ')';
}
return QString("ch");
};
for (auto i = dictionary.cbegin(), e = dictionary.cend(); i != e; ++i) {
auto &item = *i;
auto key = item.first;
@ -671,55 +574,24 @@ bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::gre
while (chars.size() != key.size()) {
auto checking = chars.size();
auto keyChar = key[checking];
auto checkedAlready = (checkTypes.size() > checking);
if (!checkedAlready) {
auto keyCharString = "0x" + QString::number(keyChar.unicode(), 16);
auto usedIfForCheck = false;
if (weContinueOldSwitch) {
weContinueOldSwitch = false;
source_->stream() << tabs(tabsUsed) << "case " << keyCharString << ":\n";
} else {
auto canCheckByIfCount = 0;
for (; checking + canCheckByIfCount != key.size(); ++canCheckByIfCount) {
if (!canUseIfForCheck(i, e, checking + canCheckByIfCount)) {
break;
}
}
auto canCheckTill = getMinimalLength(i, e, checking);
auto checkedAlready = !existsTill.isEmpty() && (existsTill.back() == canCheckTill);
if (checking + canCheckByIfCount - 1 > canCheckTill
|| checking > canCheckTill
|| (!existsTill.isEmpty() && existsTill.back() > canCheckTill)) {
logDataError() << "something wrong with the algo.";
return false;
}
auto condition = checkedAlready ? QString() : ("ch + " + QString::number(canCheckTill - 1) + " " + (canCheckTill == checking + 1 ? "!=" : "<") + " end");
existsTill.push_back(canCheckTill);
if (canCheckByIfCount > 0) {
auto checkStrings = QStringList();
for (auto checkByIf = 0; checkByIf != canCheckByIfCount; ++checkByIf) {
checkStrings.push_back(getUnicodePointer(checking + checkByIf) + "->unicode() == 0x" + QString::number(key[checking + checkByIf].unicode(), 16));
}
if (!condition.isEmpty()) {
checkStrings.push_front(condition);
}
for (auto upcomingChecked = 1; upcomingChecked != canCheckByIfCount; ++upcomingChecked) {
checkTypes.push_back(UsedCheckType::UpcomingIf);
}
source_->stream() << tabs(tabsUsed) << "if (" << checkStrings.join(" && ") << ") {\n";
usedIfForCheck = true;
} else {
source_->stream() << tabs(tabsUsed) << (condition.isEmpty() ? "" : "if (" + condition + ") ") << "switch (" << getUnicodePointer(checking) << "->unicode()) {\n";
source_->stream() << tabs(tabsUsed) << "case " << keyCharString << ":\n";
}
}
checkTypes.push_back(usedIfForCheck ? UsedCheckType::If : UsedCheckType::Switch);
++tabsUsed;
auto keyCharString = "0x" + QString::number(keyChar.unicode(), 16);
auto usedIfForCheck = !weContinueOldSwitch && canUseIfForCheck(i, e, checking);
if (weContinueOldSwitch) {
weContinueOldSwitch = false;
} else if (!usedIfForCheck) {
source_->stream() << tabs(tabsUsed) << "if (ch != end) switch (ch->unicode()) {\n";
}
if (usedIfForCheck) {
source_->stream() << tabs(tabsUsed) << "if (ch != end && ch->unicode() == " << keyCharString << ") {\n";
checkTypes.push_back(UsedCheckType::If);
} else {
source_->stream() << tabs(tabsUsed) << "case " << keyCharString << ":\n";
checkTypes.push_back(UsedCheckType::Switch);
}
writeSkipPostfix(++tabsUsed);
chars.push_back(keyChar);
}
source_->stream() << tabs(tabsUsed) << "if (outLength) *outLength = " << chars.size() << ";\n";
source_->stream() << tabs(tabsUsed) << "if (outLength) *outLength = (ch - start);\n";
// While IsReplaceEdge() currently is always true we just return the value.
//source_->stream() << tabs(1 + chars.size()) << "if (ch + " << chars.size() << " == end || IsReplaceEdge(*(ch + " << chars.size() << ")) || (ch + " << chars.size() << ")->unicode() == ' ') {\n";
@ -729,15 +601,9 @@ bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::gre
}
finishChecksTillKey(QString());
if (min >= 0) { // not the last dictionary
source_->stream() << tabs(tabsUsed) << "return nullptr;\n";
}
if (haveOuterCondition) {
source_->stream() << "\
}\n";
}
source_->stream() << "\n";
source_->stream() << "\
\n\
return nullptr;\n";
return true;
}

View File

@ -47,11 +47,7 @@ private:
bool writePacks();
bool writeFindReplace();
bool writeFind();
bool writeFindFromDictionary(const std::map<QString, int, std::greater<QString>> &dictionary);
// min < 0 - this is last checked dictionary
// max < 0 - no outer min-max check
bool writeFindFromOneDictionary(const std::map<QString, int, std::greater<QString>> &dictionary, int min = -1, int max = -1);
bool writeFindFromDictionary(const std::map<QString, int, std::greater<QString>> &dictionary, bool skipPostfixes = false);
const common::ProjectInfo &project_;
int colorsCount_ = 0;

File diff suppressed because it is too large Load Diff

View File

@ -128,21 +128,8 @@ inline EmojiPtr FromUrl(const QString &url) {
return nullptr;
}
inline EmojiPtr Find(const QChar *ch, const QChar *end, int *outLength = nullptr) {
if (ch != end) {
if (auto result = internal::Find(ch, end, outLength)) {
if (outLength && result->hasPostfix()) {
// Try to consume a pending 0xFE0F postfix.
// Comment out hasPostfix() check if you want to consume it anyway.
auto resultEnd = ch + *outLength;
if (resultEnd != end && resultEnd->unicode() == kPostfix) {
++*outLength;
}
}
return result;
}
}
return nullptr;
inline EmojiPtr Find(const QChar *start, const QChar *end, int *outLength = nullptr) {
return internal::Find(start, end, outLength);
}
inline EmojiPtr Find(const QString &text, int *outLength = nullptr) {