Alpha 1.0.11: emoji parsing fixed.

Now we ignore all 0xFE0F characters, because some other apps send or not send them randomly, not like iOS/macOS.
2017-02-17 20:31:46 +03:00 · 2017-02-17 20:31:46 +03:00 · d259656e89
parent 0411f05c39
commit d259656e89
5 changed files with 6251 additions and 3531 deletions
--- a/Telegram/SourceFiles/codegen/emoji/data.cpp
+++ b/Telegram/SourceFiles/codegen/emoji/data.cpp
@ -1760,38 +1760,59 @@ void appendCategory(Data &result, const InputCategory &category, const VariatedI
 				append(bareId, code);
 			}
 		}
-		if (emoji.id.isEmpty()) {
+		if (bareId.isEmpty()) {
 			logDataError() << "empty emoji id found.";
 			result = Data();
 			return;
 		}
-		auto it = result.map.find(emoji.id);
+		auto it = result.map.find(bareId);
 		if (it == result.map.cend()) {
-			it = result.map.insert(make_pair(emoji.id, result.list.size())).first;
+			it = result.map.insert(make_pair(bareId, result.list.size())).first;
 			result.list.push_back(move(emoji));
 		} else if (result.list[it->second].postfixed != emoji.postfixed) {
 			logDataError() << "same emoji found with different postfixed property.";
 			result = Data();
 			return;
+		} else if (result.list[it->second].id != emoji.id) {
+			logDataError() << "same emoji found with different id.";
+			result = Data();
+			return;
 		}
 		if (variatedIds.find(bareId) != variatedIds.cend()) {
 			result.list[it->second].variated = true;

 			auto baseId = Id();
-			append(baseId, *from++);
-			if (from != to && *from == kPostfix) {
-				++from;
+			if (*from == kPostfix) {
+				logDataError() << "bad first symbol in emoji.";
+				result = Data();
+				return;
 			}
+			append(baseId, *from++);
 			for (auto color : Colors) {
 				auto colored = Emoji();
 				colored.id = baseId;
 				colored.colored = true;
 				append(colored.id, color);
+				auto bareColoredId = colored.id;
 				for (auto i = from; i != to; ++i) {
 					append(colored.id, *i);
+					if (*i != kPostfix) {
+						append(bareColoredId, *i);
+					}
+				}
+				auto it = result.map.find(bareColoredId);
+				if (it == result.map.cend()) {
+					it = result.map.insert(make_pair(bareColoredId, result.list.size())).first;
+					result.list.push_back(move(colored));
+				} else if (result.list[it->second].postfixed != colored.postfixed) {
+					logDataError() << "same emoji found with different postfixed property.";
+					result = Data();
+					return;
+				} else if (result.list[it->second].id != colored.id) {
+					logDataError() << "same emoji found with different id.";
+					result = Data();
+					return;
 				}
-				result.map.insert(make_pair(colored.id, result.list.size()));
-				result.list.push_back(move(colored));
 			}
 		}
 		result.categories.back().push_back(it->second);
--- a/Telegram/SourceFiles/codegen/emoji/generator.cpp
+++ b/Telegram/SourceFiles/codegen/emoji/generator.cpp
@ -467,7 +467,9 @@ EmojiPack GetPack(DBIEmojiTab tab) {\n\
 bool Generator::writeFindReplace() {
 	source_->stream() << "\
 \n\
-EmojiPtr FindReplace(const QChar *ch, const QChar *end, int *outLength) {\n";
+EmojiPtr FindReplace(const QChar *start, const QChar *end, int *outLength) {\n\
+	auto ch = start;\n\
+\n";

 	if (!writeFindFromDictionary(data_.replaces)) {
 		return false;
@ -482,9 +484,11 @@ EmojiPtr FindReplace(const QChar *ch, const QChar *end, int *outLength) {\n";
 bool Generator::writeFind() {
 	source_->stream() << "\
 \n\
-EmojiPtr Find(const QChar *ch, const QChar *end, int *outLength) {\n";
+EmojiPtr Find(const QChar *start, const QChar *end, int *outLength) {\n\
+	auto ch = start;\n\
+\n";

-	if (!writeFindFromDictionary(data_.map)) {
+	if (!writeFindFromDictionary(data_.map, true)) {
 		return false;
 	}

@ -495,70 +499,7 @@ EmojiPtr Find(const QChar *ch, const QChar *end, int *outLength) {\n";
 	return true;
 }

-bool Generator::writeFindFromDictionary(const std::map<QString, int, std::greater<QString>> &dictionary) {
-	// That one was slower..
-	//
-	//using Map = std::map<QString, int, std::greater<QString>>;
-	//Map small; // 0-127
-	//Map medium; // 128-255
-	//Map large; // 256-65535
-	//Map other;  // surrogates
-	//for (auto &item : dictionary) {
-	//	auto key = item.first;
-	//	auto first = key.isEmpty() ? QChar(0) : QChar(key[0]);
-	//	if (!first.unicode() || first.isLowSurrogate() || (first.isHighSurrogate() && (key.size() < 2 || !QChar(key[1]).isLowSurrogate()))) {
-	//		logDataError() << "bad key.";
-	//		return false;
-	//	}
-	//	if (first.isHighSurrogate()) {
-	//		other.insert(item);
-	//	} else if (first.unicode() >= 256) {
-	//		if (first.unicode() >= 0xE000) {
-	//			// Currently if we'll have codes from both below and above the surrogates
-	//			// we'll return nullptr without checking the surrogates, because we first
-	//			// check those codes, applying the min-max range of codes from "large".
-	//			logDataError() << "codes after the surrogates are not supported.";
-	//			return false;
-	//		}
-	//		large.insert(item);
-	//	} else if (first.unicode() >= 128) {
-	//		medium.insert(item);
-	//	} else {
-	//		small.insert(item);
-	//	}
-	//}
-	//auto smallMinCheck = (medium.empty() && large.empty() && other.empty()) ? -1 : 0;
-	//auto smallMaxCheck = (medium.empty() && large.empty() && other.empty()) ? -1 : 128;
-	//if (!writeFindFromOneDictionary(small, smallMinCheck, smallMaxCheck)) {
-	//	return false;
-	//}
-	//auto mediumMinCheck = (large.empty() && other.empty()) ? -1 : 128;
-	//auto mediumMaxCheck = (large.empty() && other.empty()) ? -1 : 256;
-	//if (!writeFindFromOneDictionary(medium, mediumMinCheck, mediumMaxCheck)) {
-	//	return false;
-	//}
-	//if (!writeFindFromOneDictionary(large, other.empty() ? -1 : 0)) {
-	//	return false;
-	//}
-	//if (!writeFindFromOneDictionary(other)) {
-	//	return false;
-	//}
-
-	if (!writeFindFromOneDictionary(dictionary)) {
-		return false;
-	}
-	source_->stream() << "\
-	return nullptr;\n";
-	return true;
-}
-
-// min < 0 - no outer min-max check
-// max < 0 - this is last checked dictionary
-bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::greater<QString>> &dictionary, int min, int max) {
-	if (dictionary.empty()) {
-		return true;
-	}
-
+bool Generator::writeFindFromDictionary(const std::map<QString, int, std::greater<QString>> &dictionary, bool skipPostfixes) {
 	auto tabs = [](int size) {
 		return QString(size, '\t');
 	};
@ -572,35 +513,24 @@ bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::gre
 		uniqueFirstChars[ch] = 0;
 	}

-	auto writeBoundsCondition = false;//(uniqueFirstChars.size() > 4);
-	auto haveOuterCondition = false;
-	if (min >= 0 && max > min) {
-		haveOuterCondition = true;
-		source_->stream() << "\
-	if (ch->unicode() >= " << min << " && ch->unicode() < " << max << ") {\n";
-		if (writeBoundsCondition) {
-			source_->stream() << "\
-		if (ch->unicode() < " << foundMin << " || ch->unicode() > " << foundMax << ") {\n\
-			return nullptr;\n\
-		}\n\n";
-		}
-	} else if (writeBoundsCondition) {
-		haveOuterCondition = true;
-		source_->stream() << "\
-	if (ch->unicode() >= " << foundMin << " && ch->unicode() <= " << foundMax << ") {\n";
-	}
 	enum class UsedCheckType {
 		Switch,
 		If,
-		UpcomingIf,
 	};
 	auto checkTypes = QVector<UsedCheckType>();
-	auto existsTill = QVector<int>(1, 1);
 	auto chars = QString();
-	auto tabsUsed = haveOuterCondition ? 2 : 1;
+	auto tabsUsed = 1;
+
+	auto writeSkipPostfix = [this, &tabs, skipPostfixes](int tabsCount) {
+		if (skipPostfixes) {
+			source_->stream() << tabs(tabsCount) << "if (++ch != end && ch->unicode() == kPostfix) ++ch;\n";
+		} else {
+			source_->stream() << tabs(tabsCount) << "++ch;\n";
+		}
+	};

 	// Returns true if at least one check was finished.
-	auto finishChecksTillKey = [this, &chars, &checkTypes, &existsTill, &tabsUsed, tabs](const QString &key) {
+	auto finishChecksTillKey = [this, &chars, &checkTypes, &tabsUsed, tabs](const QString &key) {
 		auto result = false;
 		while (!chars.isEmpty() && key.midRef(0, chars.size()) != chars) {
 			result = true;
@ -615,7 +545,6 @@ bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::gre
 				}
 				if ((!chars.isEmpty() && key.midRef(0, chars.size()) != chars) || key == chars) {
 					source_->stream() << tabs(tabsUsed) << "}\n";
-					existsTill.pop_back();
 				}
 			}
 		}
@ -638,32 +567,6 @@ bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::gre
 		return true;
 	};

-	// Get minimal length of key that has first "charIndex" chars same as it
-	// and has at least one more char after them.
-	auto getMinimalLength = [](auto it, auto end, int charIndex) {
-		auto key = it->first;
-		auto result = key.size();
-		auto i = it;
-		auto keyStart = key.mid(0, charIndex);
-		for (++i; i != end; ++i) {
-			auto nextKey = i->first;
-			if (nextKey.mid(0, charIndex) != keyStart || nextKey.size() <= charIndex) {
-				break;
-			}
-			if (result > nextKey.size()) {
-				result = nextKey.size();
-			}
-		}
-		return result;
-	};
-
-	auto getUnicodePointer = [](int index) {
-		if (index > 0) {
-			return "(ch + " + QString::number(index) + ')';
-		}
-		return QString("ch");
-	};
-
 	for (auto i = dictionary.cbegin(), e = dictionary.cend(); i != e; ++i) {
 		auto &item = *i;
 		auto key = item.first;
@ -671,55 +574,24 @@ bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::gre
 		while (chars.size() != key.size()) {
 			auto checking = chars.size();
 			auto keyChar = key[checking];
-			auto checkedAlready = (checkTypes.size() > checking);
-			if (!checkedAlready) {
-				auto keyCharString = "0x" + QString::number(keyChar.unicode(), 16);
-				auto usedIfForCheck = false;
-				if (weContinueOldSwitch) {
-					weContinueOldSwitch = false;
-					source_->stream() << tabs(tabsUsed) << "case " << keyCharString << ":\n";
-				} else {
-					auto canCheckByIfCount = 0;
-					for (; checking + canCheckByIfCount != key.size(); ++canCheckByIfCount) {
-						if (!canUseIfForCheck(i, e, checking + canCheckByIfCount)) {
-							break;
-						}
-					}
-
-					auto canCheckTill = getMinimalLength(i, e, checking);
-					auto checkedAlready = !existsTill.isEmpty() && (existsTill.back() == canCheckTill);
-					if (checking + canCheckByIfCount - 1 > canCheckTill
-						|| checking > canCheckTill
-						|| (!existsTill.isEmpty() && existsTill.back() > canCheckTill)) {
-						logDataError() << "something wrong with the algo.";
-						return false;
-					}
-					auto condition = checkedAlready ? QString() : ("ch + " + QString::number(canCheckTill - 1) + " " + (canCheckTill == checking + 1 ? "!=" : "<") + " end");
-					existsTill.push_back(canCheckTill);
-					if (canCheckByIfCount > 0) {
-						auto checkStrings = QStringList();
-						for (auto checkByIf = 0; checkByIf != canCheckByIfCount; ++checkByIf) {
-							checkStrings.push_back(getUnicodePointer(checking + checkByIf) + "->unicode() == 0x" + QString::number(key[checking + checkByIf].unicode(), 16));
-						}
-						if (!condition.isEmpty()) {
-							checkStrings.push_front(condition);
-						}
-						for (auto upcomingChecked = 1; upcomingChecked != canCheckByIfCount; ++upcomingChecked) {
-							checkTypes.push_back(UsedCheckType::UpcomingIf);
-						}
-						source_->stream() << tabs(tabsUsed) << "if (" << checkStrings.join(" && ") << ") {\n";
-						usedIfForCheck = true;
-					} else {
-						source_->stream() << tabs(tabsUsed) << (condition.isEmpty() ? "" : "if (" + condition + ") ") << "switch (" << getUnicodePointer(checking) << "->unicode()) {\n";
-						source_->stream() << tabs(tabsUsed) << "case " << keyCharString << ":\n";
-					}
-				}
-				checkTypes.push_back(usedIfForCheck ? UsedCheckType::If : UsedCheckType::Switch);
-				++tabsUsed;
+			auto keyCharString = "0x" + QString::number(keyChar.unicode(), 16);
+			auto usedIfForCheck = !weContinueOldSwitch && canUseIfForCheck(i, e, checking);
+			if (weContinueOldSwitch) {
+				weContinueOldSwitch = false;
+			} else if (!usedIfForCheck) {
+				source_->stream() << tabs(tabsUsed) << "if (ch != end) switch (ch->unicode()) {\n";
 			}
+			if (usedIfForCheck) {
+				source_->stream() << tabs(tabsUsed) << "if (ch != end && ch->unicode() == " << keyCharString << ") {\n";
+				checkTypes.push_back(UsedCheckType::If);
+			} else {
+				source_->stream() << tabs(tabsUsed) << "case " << keyCharString << ":\n";
+				checkTypes.push_back(UsedCheckType::Switch);
+			}
+			writeSkipPostfix(++tabsUsed);
 			chars.push_back(keyChar);
 		}
-		source_->stream() << tabs(tabsUsed) << "if (outLength) *outLength = " << chars.size() << ";\n";
+		source_->stream() << tabs(tabsUsed) << "if (outLength) *outLength = (ch - start);\n";

 		// While IsReplaceEdge() currently is always true we just return the value.
 		//source_->stream() << tabs(1 + chars.size()) << "if (ch + " << chars.size() << " == end || IsReplaceEdge(*(ch + " << chars.size() << ")) || (ch + " << chars.size() << ")->unicode() == ' ') {\n";
@ -729,15 +601,9 @@ bool Generator::writeFindFromOneDictionary(const std::map<QString, int, std::gre
 	}
 	finishChecksTillKey(QString());

-	if (min >= 0) { // not the last dictionary
-		source_->stream() << tabs(tabsUsed) << "return nullptr;\n";
-	}
-	if (haveOuterCondition) {
-		source_->stream() << "\
-	}\n";
-	}
-	source_->stream() << "\n";
-
+	source_->stream() << "\
+\n\
+	return nullptr;\n";
 	return true;
 }

--- a/Telegram/SourceFiles/codegen/emoji/generator.h
+++ b/Telegram/SourceFiles/codegen/emoji/generator.h
@ -47,11 +47,7 @@ private:
 	bool writePacks();
 	bool writeFindReplace();
 	bool writeFind();
-	bool writeFindFromDictionary(const std::map<QString, int, std::greater<QString>> &dictionary);
-
-	// min < 0 - this is last checked dictionary
-	// max < 0 - no outer min-max check
-	bool writeFindFromOneDictionary(const std::map<QString, int, std::greater<QString>> &dictionary, int min = -1, int max = -1);
+	bool writeFindFromDictionary(const std::map<QString, int, std::greater<QString>> &dictionary, bool skipPostfixes = false);

 	const common::ProjectInfo &project_;
 	int colorsCount_ = 0;
--- a/Telegram/SourceFiles/ui/emoji_config.cpp
+++ b/Telegram/SourceFiles/ui/emoji_config.cpp
--- a/Telegram/SourceFiles/ui/emoji_config.h
+++ b/Telegram/SourceFiles/ui/emoji_config.h
@ -128,21 +128,8 @@ inline EmojiPtr FromUrl(const QString &url) {
 	return nullptr;
 }

-inline EmojiPtr Find(const QChar *ch, const QChar *end, int *outLength = nullptr) {
-	if (ch != end) {
-		if (auto result = internal::Find(ch, end, outLength)) {
-			if (outLength && result->hasPostfix()) {
-				// Try to consume a pending 0xFE0F postfix.
-				// Comment out hasPostfix() check if you want to consume it anyway.
-				auto resultEnd = ch + *outLength;
-				if (resultEnd != end && resultEnd->unicode() == kPostfix) {
-					++*outLength;
-				}
-			}
-			return result;
-		}
-	}
-	return nullptr;
+inline EmojiPtr Find(const QChar *start, const QChar *end, int *outLength = nullptr) {
+	return internal::Find(start, end, outLength);
 }

 inline EmojiPtr Find(const QString &text, int *outLength = nullptr) {