Merge pull request #123 from mkljczk/emoji-tweaks

Lazy load emoji data
This commit is contained in:
marcin mikołajczak 2024-10-17 00:16:06 +02:00 committed by GitHub
commit 3d940581f7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 125 additions and 128 deletions

View file

@ -132,7 +132,6 @@
"reselect": "^5.1.1", "reselect": "^5.1.1",
"resize-observer-polyfill": "^1.5.1", "resize-observer-polyfill": "^1.5.1",
"sass": "^1.79.4", "sass": "^1.79.4",
"semver": "^7.6.3",
"stringz": "^2.1.0", "stringz": "^2.1.0",
"tiny-queue": "^0.2.1", "tiny-queue": "^0.2.1",
"tslib": "^2.7.0", "tslib": "^2.7.0",

View file

@ -2,8 +2,8 @@ import split from 'graphemesplit';
import unicodeMapping from './mapping'; import unicodeMapping from './mapping';
import type { Emoji as EmojiMart, CustomEmoji as EmojiMartCustom } from './data';
import type { CustomEmoji as BaseCustomEmoji } from 'pl-api'; import type { CustomEmoji as BaseCustomEmoji } from 'pl-api';
import type { Emoji as EmojiMart, CustomEmoji as EmojiMartCustom } from 'pl-fe/features/emoji/data';
/* /*
* TODO: Consolate emoji object types * TODO: Consolate emoji object types

View file

@ -0,0 +1,107 @@
import { createRequire } from 'node:module';
import type { EmojiData } from './data';
import type { UnicodeMap } from './mapping';
const require = createRequire(import.meta.url);
const data = require('@emoji-mart/data/sets/14/twitter.json');
const stripLeadingZeros = /^0+/;
/*
* Twemoji strips their hex codes from unicode codepoints to make it look "pretty"
* - leading 0s are removed
* - fe0f is removed unless it has 200d
* - fe0f is NOT removed for 1f441-fe0f-200d-1f5e8-fe0f even though it has a 200d
*
* this is all wrong
*/
const blacklist = {
'1f441-fe0f-200d-1f5e8-fe0f': true,
};
const tweaks = {
'#⃣': ['23-20e3', 'hash'],
'*⃣': ['2a-20e3', 'keycap_star'],
'0⃣': ['30-20e3', 'zero'],
'1⃣': ['31-20e3', 'one'],
'2⃣': ['32-20e3', 'two'],
'3⃣': ['33-20e3', 'three'],
'4⃣': ['34-20e3', 'four'],
'5⃣': ['35-20e3', 'five'],
'6⃣': ['36-20e3', 'six'],
'7⃣': ['37-20e3', 'seven'],
'8⃣': ['38-20e3', 'eight'],
'9⃣': ['39-20e3', 'nine'],
'❤‍🔥': ['2764-fe0f-200d-1f525', 'heart_on_fire'],
'❤‍🩹': ['2764-fe0f-200d-1fa79', 'mending_heart'],
'👁‍🗨️': ['1f441-fe0f-200d-1f5e8-fe0f', 'eye-in-speech-bubble'],
'👁️‍🗨': ['1f441-fe0f-200d-1f5e8-fe0f', 'eye-in-speech-bubble'],
'👁‍🗨': ['1f441-fe0f-200d-1f5e8-fe0f', 'eye-in-speech-bubble'],
'🕵‍♂️': ['1f575-fe0f-200d-2642-fe0f', 'male-detective'],
'🕵️‍♂': ['1f575-fe0f-200d-2642-fe0f', 'male-detective'],
'🕵‍♂': ['1f575-fe0f-200d-2642-fe0f', 'male-detective'],
'🕵‍♀️': ['1f575-fe0f-200d-2640-fe0f', 'female-detective'],
'🕵️‍♀': ['1f575-fe0f-200d-2640-fe0f', 'female-detective'],
'🕵‍♀': ['1f575-fe0f-200d-2640-fe0f', 'female-detective'],
'🏌‍♂️': ['1f3cc-fe0f-200d-2642-fe0f', 'man-golfing'],
'🏌️‍♂': ['1f3cc-fe0f-200d-2642-fe0f', 'man-golfing'],
'🏌‍♂': ['1f3cc-fe0f-200d-2642-fe0f', 'man-golfing'],
'🏌‍♀️': ['1f3cc-fe0f-200d-2640-fe0f', 'woman-golfing'],
'🏌️‍♀': ['1f3cc-fe0f-200d-2640-fe0f', 'woman-golfing'],
'🏌‍♀': ['1f3cc-fe0f-200d-2640-fe0f', 'woman-golfing'],
'⛹‍♂️': ['26f9-fe0f-200d-2642-fe0f', 'man-bouncing-ball'],
'⛹️‍♂': ['26f9-fe0f-200d-2642-fe0f', 'man-bouncing-ball'],
'⛹‍♂': ['26f9-fe0f-200d-2642-fe0f', 'man-bouncing-ball'],
'⛹‍♀️': ['26f9-fe0f-200d-2640-fe0f', 'woman-bouncing-ball'],
'⛹️‍♀': ['26f9-fe0f-200d-2640-fe0f', 'woman-bouncing-ball'],
'⛹‍♀': ['26f9-fe0f-200d-2640-fe0f', 'woman-bouncing-ball'],
'🏋‍♂️': ['1f3cb-fe0f-200d-2642-fe0f', 'man-lifting-weights'],
'🏋️‍♂': ['1f3cb-fe0f-200d-2642-fe0f', 'man-lifting-weights'],
'🏋‍♂': ['1f3cb-fe0f-200d-2642-fe0f', 'man-lifting-weights'],
'🏋‍♀️': ['1f3cb-fe0f-200d-2640-fe0f', 'woman-lifting-weights'],
'🏋️‍♀': ['1f3cb-fe0f-200d-2640-fe0f', 'woman-lifting-weights'],
'🏋‍♀': ['1f3cb-fe0f-200d-2640-fe0f', 'woman-lifting-weights'],
'🏳‍🌈': ['1f3f3-fe0f-200d-1f308', 'rainbow_flag'],
'🏳‍⚧️': ['1f3f3-fe0f-200d-26a7-fe0f', 'transgender_flag'],
'🏳️‍⚧': ['1f3f3-fe0f-200d-26a7-fe0f', 'transgender_flag'],
'🏳‍⚧': ['1f3f3-fe0f-200d-26a7-fe0f', 'transgender_flag'],
};
const stripcodes = (unified: string, native: string) => {
const stripped = unified.replace(stripLeadingZeros, '');
if (unified.includes('200d') && !(unified in blacklist)) {
return stripped;
} else {
return stripped.replaceAll('-fe0f', '');
}
};
const generateMappings = (emojiMap: EmojiData['emojis']): UnicodeMap => {
const result: UnicodeMap = {};
const emojis = Object.values(emojiMap ?? {});
for (const value of emojis) {
for (const item of value.skins) {
const { unified, native } = item;
const stripped = stripcodes(unified, native);
result[native] = { unified: stripped, shortcode: value.id };
}
}
for (const [native, [unified, shortcode]] of Object.entries(tweaks)) {
const stripped = stripcodes(unified, native);
result[native] = { unified: stripped, shortcode };
}
return result;
};
const unicodeMapping = generateMappings(data.emojis);
export default () => ({
data: unicodeMapping,
});

View file

@ -1,6 +1,3 @@
import data, { EmojiData } from './data';
const stripLeadingZeros = /^0+/;
interface UnicodeMap { interface UnicodeMap {
[s: string]: { [s: string]: {
unified: string; unified: string;
@ -8,99 +5,6 @@ interface UnicodeMap {
}; };
} }
/* export default import.meta.compileTime<UnicodeMap>('./mapping-compiletime.ts');
* Twemoji strips their hex codes from unicode codepoints to make it look "pretty"
* - leading 0s are removed
* - fe0f is removed unless it has 200d
* - fe0f is NOT removed for 1f441-fe0f-200d-1f5e8-fe0f even though it has a 200d
*
* this is all wrong
*/
const blacklist = { export type { UnicodeMap };
'1f441-fe0f-200d-1f5e8-fe0f': true,
};
const tweaks = {
'#⃣': ['23-20e3', 'hash'],
'*⃣': ['2a-20e3', 'keycap_star'],
'0⃣': ['30-20e3', 'zero'],
'1⃣': ['31-20e3', 'one'],
'2⃣': ['32-20e3', 'two'],
'3⃣': ['33-20e3', 'three'],
'4⃣': ['34-20e3', 'four'],
'5⃣': ['35-20e3', 'five'],
'6⃣': ['36-20e3', 'six'],
'7⃣': ['37-20e3', 'seven'],
'8⃣': ['38-20e3', 'eight'],
'9⃣': ['39-20e3', 'nine'],
'❤‍🔥': ['2764-fe0f-200d-1f525', 'heart_on_fire'],
'❤‍🩹': ['2764-fe0f-200d-1fa79', 'mending_heart'],
'👁‍🗨️': ['1f441-fe0f-200d-1f5e8-fe0f', 'eye-in-speech-bubble'],
'👁️‍🗨': ['1f441-fe0f-200d-1f5e8-fe0f', 'eye-in-speech-bubble'],
'👁‍🗨': ['1f441-fe0f-200d-1f5e8-fe0f', 'eye-in-speech-bubble'],
'🕵‍♂️': ['1f575-fe0f-200d-2642-fe0f', 'male-detective'],
'🕵️‍♂': ['1f575-fe0f-200d-2642-fe0f', 'male-detective'],
'🕵‍♂': ['1f575-fe0f-200d-2642-fe0f', 'male-detective'],
'🕵‍♀️': ['1f575-fe0f-200d-2640-fe0f', 'female-detective'],
'🕵️‍♀': ['1f575-fe0f-200d-2640-fe0f', 'female-detective'],
'🕵‍♀': ['1f575-fe0f-200d-2640-fe0f', 'female-detective'],
'🏌‍♂️': ['1f3cc-fe0f-200d-2642-fe0f', 'man-golfing'],
'🏌️‍♂': ['1f3cc-fe0f-200d-2642-fe0f', 'man-golfing'],
'🏌‍♂': ['1f3cc-fe0f-200d-2642-fe0f', 'man-golfing'],
'🏌‍♀️': ['1f3cc-fe0f-200d-2640-fe0f', 'woman-golfing'],
'🏌️‍♀': ['1f3cc-fe0f-200d-2640-fe0f', 'woman-golfing'],
'🏌‍♀': ['1f3cc-fe0f-200d-2640-fe0f', 'woman-golfing'],
'⛹‍♂️': ['26f9-fe0f-200d-2642-fe0f', 'man-bouncing-ball'],
'⛹️‍♂': ['26f9-fe0f-200d-2642-fe0f', 'man-bouncing-ball'],
'⛹‍♂': ['26f9-fe0f-200d-2642-fe0f', 'man-bouncing-ball'],
'⛹‍♀️': ['26f9-fe0f-200d-2640-fe0f', 'woman-bouncing-ball'],
'⛹️‍♀': ['26f9-fe0f-200d-2640-fe0f', 'woman-bouncing-ball'],
'⛹‍♀': ['26f9-fe0f-200d-2640-fe0f', 'woman-bouncing-ball'],
'🏋‍♂️': ['1f3cb-fe0f-200d-2642-fe0f', 'man-lifting-weights'],
'🏋️‍♂': ['1f3cb-fe0f-200d-2642-fe0f', 'man-lifting-weights'],
'🏋‍♂': ['1f3cb-fe0f-200d-2642-fe0f', 'man-lifting-weights'],
'🏋‍♀️': ['1f3cb-fe0f-200d-2640-fe0f', 'woman-lifting-weights'],
'🏋️‍♀': ['1f3cb-fe0f-200d-2640-fe0f', 'woman-lifting-weights'],
'🏋‍♀': ['1f3cb-fe0f-200d-2640-fe0f', 'woman-lifting-weights'],
'🏳‍🌈': ['1f3f3-fe0f-200d-1f308', 'rainbow_flag'],
'🏳‍⚧️': ['1f3f3-fe0f-200d-26a7-fe0f', 'transgender_flag'],
'🏳️‍⚧': ['1f3f3-fe0f-200d-26a7-fe0f', 'transgender_flag'],
'🏳‍⚧': ['1f3f3-fe0f-200d-26a7-fe0f', 'transgender_flag'],
};
const stripcodes = (unified: string, native: string) => {
const stripped = unified.replace(stripLeadingZeros, '');
if (unified.includes('200d') && !(unified in blacklist)) {
return stripped;
} else {
return stripped.replaceAll('-fe0f', '');
}
};
const generateMappings = (data: EmojiData): UnicodeMap => {
const result: UnicodeMap = {};
const emojis = Object.values(data.emojis ?? {});
for (const value of emojis) {
for (const item of value.skins) {
const { unified, native } = item;
const stripped = stripcodes(unified, native);
result[native] = { unified: stripped, shortcode: value.id };
}
}
for (const [native, [unified, shortcode]] of Object.entries(tweaks)) {
const stripped = stripcodes(unified, native);
result[native] = { unified: stripped, shortcode };
}
return result;
};
const unicodeMapping = generateMappings(data);
export { generateMappings, unicodeMapping as default };

View file

@ -1,21 +1,26 @@
import FlexSearch from 'flexsearch'; import FlexSearch from 'flexsearch';
import data from './data'; import type { EmojiData } from './data';
import type { Emoji } from './index'; import type { Emoji } from './index';
import type { CustomEmoji } from 'pl-api'; import type { CustomEmoji } from 'pl-api';
let emojis: EmojiData['emojis'] = {};
import('./data').then(data => {
emojis = data.emojis;
const sortedEmojis = Object.entries(emojis).sort((a, b) => a[0].localeCompare(b[0]));
for (const [key, emoji] of sortedEmojis) {
index.add('n' + key, `${emoji.id} ${emoji.name} ${emoji.keywords.join(' ')}`);
}
}).catch(() => { });
const index = new FlexSearch.Index({ const index = new FlexSearch.Index({
tokenize: 'full', tokenize: 'full',
optimize: true, optimize: true,
context: true, context: true,
}); });
const sortedEmojis = Object.entries(data.emojis).sort((a, b) => a[0].localeCompare(b[0]));
for (const [key, emoji] of sortedEmojis) {
index.add('n' + key, `${emoji.id} ${emoji.name} ${emoji.keywords.join(' ')}`);
}
interface searchOptions { interface searchOptions {
maxResults?: number; maxResults?: number;
custom?: any; custom?: any;
@ -58,7 +63,7 @@ const search = (
} }
} }
const skins = data.emojis[id.slice(1)]?.skins; const skins = emojis[id.slice(1)]?.skins;
if (skins) { if (skins) {
return { return {

View file

@ -1,5 +1,4 @@
import { buildCustomEmojis } from 'pl-fe/features/emoji'; import { buildCustomEmojis } from 'pl-fe/features/emoji';
import emojiData from 'pl-fe/features/emoji/data';
import { addCustomToPool } from 'pl-fe/features/emoji/search'; import { addCustomToPool } from 'pl-fe/features/emoji/search';
import { CUSTOM_EMOJIS_FETCH_SUCCESS, type CustomEmojisAction } from '../actions/custom-emojis'; import { CUSTOM_EMOJIS_FETCH_SUCCESS, type CustomEmojisAction } from '../actions/custom-emojis';
@ -8,27 +7,10 @@ import type { CustomEmoji } from 'pl-api';
const initialState: Array<CustomEmoji> = []; const initialState: Array<CustomEmoji> = [];
// Populate custom emojis for composer autosuggest
const autosuggestPopulate = (emojis: Array<CustomEmoji>) => {
addCustomToPool(buildCustomEmojis(emojis));
};
const importEmojis = (customEmojis: Array<CustomEmoji>) => {
const emojis = customEmojis.filter((emoji) => {
// If a custom emoji has the shortcode of a Unicode emoji, skip it.
// Otherwise it breaks EmojiMart.
// https://gitlab.com/soapbox-pub/soapbox/-/issues/610
const shortcode = emoji.shortcode.toLowerCase();
return !emojiData.emojis[shortcode];
});
autosuggestPopulate(emojis);
return emojis;
};
const custom_emojis = (state = initialState, action: CustomEmojisAction) => { const custom_emojis = (state = initialState, action: CustomEmojisAction) => {
if (action.type === CUSTOM_EMOJIS_FETCH_SUCCESS) { if (action.type === CUSTOM_EMOJIS_FETCH_SUCCESS) {
return importEmojis(action.custom_emojis); addCustomToPool(buildCustomEmojis(action.custom_emojis));
return action.custom_emojis;
} }
return state; return state;