Merge emoji results from different locales more efficiently.
This new method is more verbose but doesn't require a "seen" set. Bug: b:357772460 Change-Id: Iac00a0b8e9410de3854787eca2ba7b85e63fffd4 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5849409 Reviewed-by: Michael Cui <mlcui@google.com> Commit-Queue: Darren Shen <shend@chromium.org> Cr-Commit-Position: refs/heads/main@{#1354932}
This commit is contained in:

committed by
Chromium LUCI CQ

parent
f560fdf062
commit
b5ef432521
@ -50,6 +50,16 @@ bool operator<(std::u16string_view a, PrefixMatcher b) {
|
|||||||
return a.substr(0, b.prefix.size()) < b.prefix;
|
return a.substr(0, b.prefix.size()) < b.prefix;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Represents a score for an emoji match.
|
||||||
|
struct EmojiScore {
|
||||||
|
// Scores are compared by the language score first (the higher the better).
|
||||||
|
// The relevance score is only used when the language scores are equal.
|
||||||
|
int language_score;
|
||||||
|
double relevance_score;
|
||||||
|
|
||||||
|
auto operator<=>(const EmojiScore& other) const = default;
|
||||||
|
};
|
||||||
|
|
||||||
// Map from keyword -> sum of position weightings
|
// Map from keyword -> sum of position weightings
|
||||||
std::map<std::u16string, double, std::less<>> CombineSearchTerms(
|
std::map<std::u16string, double, std::less<>> CombineSearchTerms(
|
||||||
base::span<const std::string_view> long_search_terms) {
|
base::span<const std::string_view> long_search_terms) {
|
||||||
@ -121,12 +131,13 @@ void AddDataFromFileToMap(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<std::string_view, double> GetResultsFromMap(
|
std::map<std::string_view, EmojiScore> GetResultsFromMap(
|
||||||
const EmojiEntryMap& map,
|
const EmojiEntryMap& map,
|
||||||
base::span<const std::u16string_view> lowercase_words) {
|
base::span<const std::u16string_view> lowercase_words,
|
||||||
std::map<std::string_view, double> scored_emoji;
|
int language_score) {
|
||||||
|
std::map<std::string_view, EmojiScore> scored_emoji;
|
||||||
for (const std::u16string_view lowercase_word : lowercase_words) {
|
for (const std::u16string_view lowercase_word : lowercase_words) {
|
||||||
std::map<std::string_view, double> word_scored_emoji;
|
std::map<std::string_view, EmojiScore> word_scored_emoji;
|
||||||
for (auto [matches, end] = map.equal_range(PrefixMatcher{lowercase_word});
|
for (auto [matches, end] = map.equal_range(PrefixMatcher{lowercase_word});
|
||||||
matches != end; ++matches) {
|
matches != end; ++matches) {
|
||||||
for (const auto& match : matches->second) {
|
for (const auto& match : matches->second) {
|
||||||
@ -137,13 +148,15 @@ std::map<std::string_view, double> GetResultsFromMap(
|
|||||||
} else if (const auto& it = scored_emoji.find(match.emoji_string);
|
} else if (const auto& it = scored_emoji.find(match.emoji_string);
|
||||||
it != scored_emoji.end()) {
|
it != scored_emoji.end()) {
|
||||||
// Second+ word, and emoji was previously found.
|
// Second+ word, and emoji was previously found.
|
||||||
previous_score = it->second;
|
previous_score = it->second.relevance_score;
|
||||||
} else {
|
} else {
|
||||||
// Second+ word, and emoji was not previously found.
|
// Second+ word, and emoji was not previously found.
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Will zero initialize if entry missing
|
// Will zero initialize if entry missing
|
||||||
word_scored_emoji[match.emoji_string] +=
|
EmojiScore& score = word_scored_emoji[match.emoji_string];
|
||||||
|
score.language_score = language_score;
|
||||||
|
score.relevance_score +=
|
||||||
previous_score * match.weighting / matches->first.size();
|
previous_score * match.weighting / matches->first.size();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -158,29 +171,24 @@ std::map<std::string_view, double> GetResultsFromMap(
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<EmojiSearchEntry> SortEmojiResultsByScore(
|
std::vector<EmojiSearchEntry> SortEmojiResultsByScore(
|
||||||
std::map<std::string_view, double> scored_emoji) {
|
std::map<std::string_view, EmojiScore> scored_emoji) {
|
||||||
|
std::vector<std::pair<EmojiScore, std::string_view>> emojis_by_score;
|
||||||
|
emojis_by_score.reserve(scored_emoji.size());
|
||||||
|
base::ranges::transform(scored_emoji, std::back_inserter(emojis_by_score),
|
||||||
|
[](const auto& entry) {
|
||||||
|
return std::make_pair(entry.second, entry.first);
|
||||||
|
});
|
||||||
|
base::ranges::sort(emojis_by_score, std::greater<>());
|
||||||
std::vector<EmojiSearchEntry> ret;
|
std::vector<EmojiSearchEntry> ret;
|
||||||
ret.reserve(scored_emoji.size());
|
ret.reserve(scored_emoji.size());
|
||||||
for (const auto& [emoji, weighting] : scored_emoji) {
|
base::ranges::transform(emojis_by_score, std::back_inserter(ret),
|
||||||
ret.push_back({weighting, std::string(emoji)});
|
[](const auto& entry) {
|
||||||
}
|
return EmojiSearchEntry{entry.first.relevance_score,
|
||||||
base::ranges::sort(
|
std::string(entry.second)};
|
||||||
ret, base::ranges::greater(),
|
});
|
||||||
[](const EmojiSearchEntry& entry) { return entry.weighting; });
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MergeResults(std::vector<EmojiSearchEntry>& accumulator,
|
|
||||||
std::set<std::string>& seen,
|
|
||||||
base::span<EmojiSearchEntry> new_results) {
|
|
||||||
for (EmojiSearchEntry& new_result : new_results) {
|
|
||||||
auto [it, inserted] = seen.emplace(new_result.emoji_string);
|
|
||||||
if (inserted) {
|
|
||||||
accumulator.push_back(std::move(new_result));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::optional<EmojiLanguageCode> GetLanguageCode(std::string_view code) {
|
std::optional<EmojiLanguageCode> GetLanguageCode(std::string_view code) {
|
||||||
static constexpr auto kLangCodeStrToEnum =
|
static constexpr auto kLangCodeStrToEnum =
|
||||||
base::MakeFixedFlatMap<std::string_view, EmojiLanguageCode>({
|
base::MakeFixedFlatMap<std::string_view, EmojiLanguageCode>({
|
||||||
@ -329,12 +337,9 @@ EmojiLanguageData::EmojiLanguageData(EmojiLanguageData&& emoji_language_data) =
|
|||||||
EmojiSearchResult EmojiSearch::SearchEmoji(
|
EmojiSearchResult EmojiSearch::SearchEmoji(
|
||||||
std::u16string_view query,
|
std::u16string_view query,
|
||||||
base::span<const std::string> language_codes) {
|
base::span<const std::string> language_codes) {
|
||||||
std::vector<EmojiSearchEntry> emojis;
|
std::map<std::string_view, EmojiScore> emojis;
|
||||||
std::set<std::string> seen_emojis;
|
std::map<std::string_view, EmojiScore> symbols;
|
||||||
std::vector<EmojiSearchEntry> symbols;
|
std::map<std::string_view, EmojiScore> emoticons;
|
||||||
std::set<std::string> seen_symbols;
|
|
||||||
std::vector<EmojiSearchEntry> emoticons;
|
|
||||||
std::set<std::string> seen_emoticons;
|
|
||||||
|
|
||||||
// Make search case insensitive.
|
// Make search case insensitive.
|
||||||
std::u16string lowercase_query = base::i18n::ToLower(query);
|
std::u16string lowercase_query = base::i18n::ToLower(query);
|
||||||
@ -343,6 +348,9 @@ EmojiSearchResult EmojiSearch::SearchEmoji(
|
|||||||
lowercase_query, u" ", base::WhitespaceHandling::TRIM_WHITESPACE,
|
lowercase_query, u" ", base::WhitespaceHandling::TRIM_WHITESPACE,
|
||||||
base::SplitResult::SPLIT_WANT_NONEMPTY);
|
base::SplitResult::SPLIT_WANT_NONEMPTY);
|
||||||
|
|
||||||
|
// `language_codes` are sorted in order of preference, so start with a high
|
||||||
|
// language score then go down.
|
||||||
|
int language_score = 0;
|
||||||
for (const std::string& code_str : language_codes) {
|
for (const std::string& code_str : language_codes) {
|
||||||
std::optional<EmojiLanguageCode> code = GetLanguageCode(code_str);
|
std::optional<EmojiLanguageCode> code = GetLanguageCode(code_str);
|
||||||
if (!code.has_value()) {
|
if (!code.has_value()) {
|
||||||
@ -350,18 +358,18 @@ EmojiSearchResult EmojiSearch::SearchEmoji(
|
|||||||
}
|
}
|
||||||
if (const auto& it = language_data_.find(*code);
|
if (const auto& it = language_data_.find(*code);
|
||||||
it != language_data_.end()) {
|
it != language_data_.end()) {
|
||||||
std::vector<EmojiSearchEntry> new_emojis = SortEmojiResultsByScore(
|
emojis.merge(GetResultsFromMap(it->second.emojis, lowercase_words,
|
||||||
GetResultsFromMap(it->second.emojis, lowercase_words));
|
language_score));
|
||||||
MergeResults(emojis, seen_emojis, new_emojis);
|
symbols.merge(GetResultsFromMap(it->second.symbols, lowercase_words,
|
||||||
std::vector<EmojiSearchEntry> new_symbols = SortEmojiResultsByScore(
|
language_score));
|
||||||
GetResultsFromMap(it->second.symbols, lowercase_words));
|
emoticons.merge(GetResultsFromMap(it->second.emoticons, lowercase_words,
|
||||||
MergeResults(symbols, seen_symbols, new_symbols);
|
language_score));
|
||||||
std::vector<EmojiSearchEntry> new_emoticons = SortEmojiResultsByScore(
|
--language_score;
|
||||||
GetResultsFromMap(it->second.emoticons, lowercase_words));
|
|
||||||
MergeResults(emoticons, seen_emoticons, new_emoticons);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return EmojiSearchResult(emojis, symbols, emoticons);
|
return EmojiSearchResult(SortEmojiResultsByScore(emojis),
|
||||||
|
SortEmojiResultsByScore(symbols),
|
||||||
|
SortEmojiResultsByScore(emoticons));
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmojiSearch::LoadEmojiLanguages(
|
void EmojiSearch::LoadEmojiLanguages(
|
||||||
|
Reference in New Issue
Block a user