remove cheerio and add custom html parser
This commit is contained in:
parent
a2fab1285d
commit
9d0a3b7a69
3 changed files with 57 additions and 162 deletions
|
@ -1,10 +1,5 @@
|
|||
// import data from '@emoji-mart/data';
|
||||
import { load as cheerioLoad } from 'cheerio';
|
||||
import { parseDocument } from 'htmlparser2';
|
||||
|
||||
import unicodeMapping from './mapping';
|
||||
|
||||
import type { Node as CheerioNode } from 'cheerio';
|
||||
import type { Emoji as EmojiMart, CustomEmoji as EmojiMartCustom } from 'emoji-mart';
|
||||
|
||||
/*
|
||||
|
@ -99,24 +94,24 @@ const popStack = (stack: string, open: boolean) => {
|
|||
|
||||
// TODO: handle grouped unicode emojis
|
||||
export const emojifyText = (str: string, customEmojis = {}) => {
|
||||
let res = '';
|
||||
let buf = '';
|
||||
let stack = '';
|
||||
let open = false;
|
||||
|
||||
for (const c of Array.from(str)) { // chunk by unicode codepoint with Array.from
|
||||
if (c in unicodeMapping) {
|
||||
if (open) { // unicode emoji inside colon
|
||||
res += popStack(stack, open);
|
||||
buf += popStack(stack, open);
|
||||
}
|
||||
|
||||
res += convertUnicode(c);
|
||||
buf += convertUnicode(c);
|
||||
|
||||
} else if (c === ':') {
|
||||
stack += ':';
|
||||
|
||||
// we see another : we convert it and clear the stack buffer
|
||||
if (open) {
|
||||
res += convertEmoji(stack, customEmojis);
|
||||
buf += convertEmoji(stack, customEmojis);
|
||||
stack = '';
|
||||
}
|
||||
|
||||
|
@ -128,54 +123,75 @@ export const emojifyText = (str: string, customEmojis = {}) => {
|
|||
// if the stack is non-null and we see invalid chars it's a string not emoji
|
||||
// so we push it to the return result and clear it
|
||||
if (!validEmojiChar(c)) {
|
||||
res += popStack(stack, open);
|
||||
buf += popStack(stack, open);
|
||||
}
|
||||
} else {
|
||||
res += c;
|
||||
buf += c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// never found a closing colon so it's just a raw string
|
||||
if (open) {
|
||||
res += stack;
|
||||
buf += stack;
|
||||
}
|
||||
|
||||
return res;
|
||||
return buf;
|
||||
};
|
||||
|
||||
// const parseHmtl = (str: string) => {
|
||||
// const ret = [];
|
||||
// let depth = 0;
|
||||
//
|
||||
// return ret;
|
||||
// }
|
||||
const parseHTML = (str: string): { text: boolean, data: string }[] => {
|
||||
const tokens = [];
|
||||
let buf = '';
|
||||
let stack = '';
|
||||
let open = false;
|
||||
|
||||
const filterTextNodes = (idx: number, el: CheerioNode) => {
|
||||
return el.nodeType === Node.TEXT_NODE;
|
||||
for (const c of str) {
|
||||
if (c === '<') {
|
||||
if (open) {
|
||||
tokens.push({ text: true, data: stack });
|
||||
stack = '<';
|
||||
} else {
|
||||
tokens.push({ text: true, data: buf });
|
||||
stack = '<';
|
||||
open = true;
|
||||
}
|
||||
} else if (c === '>') {
|
||||
if (open) {
|
||||
open = false;
|
||||
tokens.push({ text: false, data: stack + '>' });
|
||||
stack = '';
|
||||
buf = '';
|
||||
} else {
|
||||
buf += '>';
|
||||
}
|
||||
|
||||
} else {
|
||||
if (open) {
|
||||
stack += c;
|
||||
} else {
|
||||
buf += c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (open) {
|
||||
tokens.push({ text: true, data: buf + stack });
|
||||
} else if (buf !== '') {
|
||||
tokens.push({ text: true, data: buf });
|
||||
}
|
||||
|
||||
return tokens;
|
||||
};
|
||||
|
||||
const emojify = (str: string, customEmojis = {}) => {
|
||||
const dom = parseDocument(str);
|
||||
const $ = cheerioLoad(dom, {
|
||||
xmlMode: true,
|
||||
decodeEntities: false,
|
||||
});
|
||||
return parseHTML(str)
|
||||
.map(({ text, data }) => {
|
||||
if (!text) return data;
|
||||
if (data.length === 0 || data === ' ') return data;
|
||||
|
||||
$.root()
|
||||
.contents() // iterate over flat map of all html elements
|
||||
.filter(filterTextNodes) // only iterate over text nodes
|
||||
.each((idx, el) => {
|
||||
// skip common case
|
||||
// @ts-ignore
|
||||
if (el.data.length === 0 || el.data === ' ') return;
|
||||
|
||||
// mutating el.data is incorrect but we do it to prevent a second dom parse
|
||||
// @ts-ignore
|
||||
el.data = emojifyText(el.data, customEmojis);
|
||||
});
|
||||
|
||||
return $.html();
|
||||
return emojifyText(data, customEmojis);
|
||||
})
|
||||
.join('');
|
||||
};
|
||||
|
||||
export default emojify;
|
||||
|
|
|
@ -104,7 +104,6 @@
|
|||
"bootstrap-icons": "^1.5.0",
|
||||
"bowser": "^2.11.0",
|
||||
"browserslist": "^4.16.6",
|
||||
"cheerio": "^1.0.0-rc.12",
|
||||
"classnames": "^2.2.5",
|
||||
"copy-webpack-plugin": "^9.0.1",
|
||||
"core-js": "^3.15.2",
|
||||
|
@ -125,7 +124,6 @@
|
|||
"history": "^4.10.1",
|
||||
"html-webpack-harddisk-plugin": "^2.0.0",
|
||||
"html-webpack-plugin": "^5.5.0",
|
||||
"htmlparser2": "^8.0.1",
|
||||
"http-link-header": "^1.0.2",
|
||||
"immutable": "^4.0.0",
|
||||
"imports-loader": "^4.0.0",
|
||||
|
|
121
yarn.lock
121
yarn.lock
|
@ -1271,13 +1271,6 @@
|
|||
dependencies:
|
||||
regenerator-runtime "^0.12.0"
|
||||
|
||||
"@babel/runtime@^7.0.0":
|
||||
version "7.18.6"
|
||||
resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.18.6.tgz#6a1ef59f838debd670421f8c7f2cbb8da9751580"
|
||||
integrity sha512-t9wi7/AW6XtKahAe20Yw0/mMljKq0B1r2fPdvaAdV/KPDZewFXdaaa6K7lxmZBZ8FBNpCiAT6iHPmd6QO9bKfQ==
|
||||
dependencies:
|
||||
regenerator-runtime "^0.13.4"
|
||||
|
||||
"@babel/runtime@^7.1.2", "@babel/runtime@^7.10.2", "@babel/runtime@^7.11.2", "@babel/runtime@^7.12.1", "@babel/runtime@^7.2.0", "@babel/runtime@^7.8.4", "@babel/runtime@^7.9.2":
|
||||
version "7.15.4"
|
||||
resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.15.4.tgz#fd17d16bfdf878e6dd02d19753a39fa8a8d9c84a"
|
||||
|
@ -4079,31 +4072,6 @@ charcodes@^0.2.0:
|
|||
resolved "https://registry.yarnpkg.com/charcodes/-/charcodes-0.2.0.tgz#5208d327e6cc05f99eb80ffc814707572d1f14e4"
|
||||
integrity sha512-Y4kiDb+AM4Ecy58YkuZrrSRJBDQdQ2L+NyS1vHHFtNtUjgutcZfx3yp1dAONI/oPaPmyGfCLx5CxL+zauIMyKQ==
|
||||
|
||||
cheerio-select@^2.1.0:
|
||||
version "2.1.0"
|
||||
resolved "https://registry.yarnpkg.com/cheerio-select/-/cheerio-select-2.1.0.tgz#4d8673286b8126ca2a8e42740d5e3c4884ae21b4"
|
||||
integrity sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==
|
||||
dependencies:
|
||||
boolbase "^1.0.0"
|
||||
css-select "^5.1.0"
|
||||
css-what "^6.1.0"
|
||||
domelementtype "^2.3.0"
|
||||
domhandler "^5.0.3"
|
||||
domutils "^3.0.1"
|
||||
|
||||
cheerio@^1.0.0-rc.12:
|
||||
version "1.0.0-rc.12"
|
||||
resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.12.tgz#788bf7466506b1c6bf5fae51d24a2c4d62e47683"
|
||||
integrity sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==
|
||||
dependencies:
|
||||
cheerio-select "^2.1.0"
|
||||
dom-serializer "^2.0.0"
|
||||
domhandler "^5.0.3"
|
||||
domutils "^3.0.1"
|
||||
htmlparser2 "^8.0.1"
|
||||
parse5 "^7.0.0"
|
||||
parse5-htmlparser2-tree-adapter "^7.0.0"
|
||||
|
||||
"chokidar@>=3.0.0 <4.0.0":
|
||||
version "3.5.2"
|
||||
resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.2.tgz#dba3976fcadb016f66fd365021d91600d01c1e75"
|
||||
|
@ -4496,17 +4464,6 @@ css-select@^4.1.3:
|
|||
domutils "^2.6.0"
|
||||
nth-check "^2.0.0"
|
||||
|
||||
css-select@^5.1.0:
|
||||
version "5.1.0"
|
||||
resolved "https://registry.yarnpkg.com/css-select/-/css-select-5.1.0.tgz#b8ebd6554c3637ccc76688804ad3f6a6fdaea8a6"
|
||||
integrity sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==
|
||||
dependencies:
|
||||
boolbase "^1.0.0"
|
||||
css-what "^6.1.0"
|
||||
domhandler "^5.0.2"
|
||||
domutils "^3.0.1"
|
||||
nth-check "^2.0.1"
|
||||
|
||||
css-system-font-keywords@^1.0.0:
|
||||
version "1.0.0"
|
||||
resolved "https://registry.yarnpkg.com/css-system-font-keywords/-/css-system-font-keywords-1.0.0.tgz#85c6f086aba4eb32c571a3086affc434b84823ed"
|
||||
|
@ -4525,11 +4482,6 @@ css-what@^5.0.0:
|
|||
resolved "https://registry.yarnpkg.com/css-what/-/css-what-5.0.1.tgz#3efa820131f4669a8ac2408f9c32e7c7de9f4cad"
|
||||
integrity sha512-FYDTSHb/7KXsWICVsxdmiExPjCfRC4qRFBdVwv7Ax9hMnvMmEjP9RfxTEZ3qPZGmADDn2vAKSo9UcN1jKVYscg==
|
||||
|
||||
css-what@^6.1.0:
|
||||
version "6.1.0"
|
||||
resolved "https://registry.yarnpkg.com/css-what/-/css-what-6.1.0.tgz#fb5effcf76f1ddea2c81bdfaa4de44e79bac70f4"
|
||||
integrity sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==
|
||||
|
||||
css.escape@^1.5.1:
|
||||
version "1.5.1"
|
||||
resolved "https://registry.yarnpkg.com/css.escape/-/css.escape-1.5.1.tgz#42e27d4fa04ae32f931a4b4d4191fa9cddee97cb"
|
||||
|
@ -4952,15 +4904,6 @@ dom-serializer@^1.0.1:
|
|||
domhandler "^4.2.0"
|
||||
entities "^2.0.0"
|
||||
|
||||
dom-serializer@^2.0.0:
|
||||
version "2.0.0"
|
||||
resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-2.0.0.tgz#e41b802e1eedf9f6cae183ce5e622d789d7d8e53"
|
||||
integrity sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==
|
||||
dependencies:
|
||||
domelementtype "^2.3.0"
|
||||
domhandler "^5.0.2"
|
||||
entities "^4.2.0"
|
||||
|
||||
domelementtype@1, domelementtype@^1.3.1:
|
||||
version "1.3.1"
|
||||
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.3.1.tgz#d048c44b37b0d10a7f2a3d5fee3f4333d790481f"
|
||||
|
@ -4971,11 +4914,6 @@ domelementtype@^2.0.1, domelementtype@^2.2.0:
|
|||
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.2.0.tgz#9a0b6c2782ed6a1c7323d42267183df9bd8b1d57"
|
||||
integrity sha512-DtBMo82pv1dFtUmHyr48beiuq792Sxohr+8Hm9zoxklYPfa6n0Z3Byjj2IV7bmr2IyqClnqEQhfgHJJ5QF0R5A==
|
||||
|
||||
domelementtype@^2.3.0:
|
||||
version "2.3.0"
|
||||
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.3.0.tgz#5c45e8e869952626331d7aab326d01daf65d589d"
|
||||
integrity sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==
|
||||
|
||||
domexception@^1.0.1:
|
||||
version "1.0.1"
|
||||
resolved "https://registry.yarnpkg.com/domexception/-/domexception-1.0.1.tgz#937442644ca6a31261ef36e3ec677fe805582c90"
|
||||
|
@ -5004,13 +4942,6 @@ domhandler@^4.0.0, domhandler@^4.2.0:
|
|||
dependencies:
|
||||
domelementtype "^2.2.0"
|
||||
|
||||
domhandler@^5.0.1, domhandler@^5.0.2, domhandler@^5.0.3:
|
||||
version "5.0.3"
|
||||
resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-5.0.3.tgz#cc385f7f751f1d1fc650c21374804254538c7d31"
|
||||
integrity sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==
|
||||
dependencies:
|
||||
domelementtype "^2.3.0"
|
||||
|
||||
domutils@^1.5.1:
|
||||
version "1.7.0"
|
||||
resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.7.0.tgz#56ea341e834e06e6748af7a1cb25da67ea9f8c2a"
|
||||
|
@ -5028,15 +4959,6 @@ domutils@^2.5.2, domutils@^2.6.0:
|
|||
domelementtype "^2.2.0"
|
||||
domhandler "^4.2.0"
|
||||
|
||||
domutils@^3.0.1:
|
||||
version "3.0.1"
|
||||
resolved "https://registry.yarnpkg.com/domutils/-/domutils-3.0.1.tgz#696b3875238338cb186b6c0612bd4901c89a4f1c"
|
||||
integrity sha512-z08c1l761iKhDFtfXO04C7kTdPBLi41zwOZl00WS8b5eiaebNpY00HKbztwBq+e3vyqWNwWF3mP9YLUeqIrF+Q==
|
||||
dependencies:
|
||||
dom-serializer "^2.0.0"
|
||||
domelementtype "^2.3.0"
|
||||
domhandler "^5.0.1"
|
||||
|
||||
dot-case@^3.0.4:
|
||||
version "3.0.4"
|
||||
resolved "https://registry.yarnpkg.com/dot-case/-/dot-case-3.0.4.tgz#9b2b670d00a431667a8a75ba29cd1b98809ce751"
|
||||
|
@ -5102,15 +5024,6 @@ emoji-datasource@5.0.0:
|
|||
resolved "https://registry.yarnpkg.com/emoji-datasource/-/emoji-datasource-5.0.0.tgz#1522fdba3c52223a1cf5a1c1fc282935400eaa06"
|
||||
integrity sha512-LuvLWFnxznTH++GytEzpzOPUo1SB+6CUFqIlVETJJ3x9fpyMCKFfyqberbhMLOpT1qcNe+km+zoyBeUSC3u5Rw==
|
||||
|
||||
"emoji-mart-old@npm:emoji-mart-lazyload":
|
||||
version "3.0.1-j"
|
||||
resolved "https://registry.yarnpkg.com/emoji-mart-lazyload/-/emoji-mart-lazyload-3.0.1-j.tgz#87a90d30b79d9145ece078d53e3e683c1a10ce9c"
|
||||
integrity sha512-0wKF7MR0/iAeCIoiBLY+JjXCugycTgYRC2SL0y9/bjNSQlbeMdzILmPQJAufU/mgLFDUitOvjxLDhOZ9yxZ48g==
|
||||
dependencies:
|
||||
"@babel/runtime" "^7.0.0"
|
||||
intersection-observer "^0.12.0"
|
||||
prop-types "^15.6.0"
|
||||
|
||||
emoji-mart@^5.1.0:
|
||||
version "5.1.0"
|
||||
resolved "https://registry.yarnpkg.com/emoji-mart/-/emoji-mart-5.1.0.tgz#8a36a872e1297747342d1385bd7b7141ac2f4365"
|
||||
|
@ -5181,7 +5094,7 @@ entities@^2.0.0:
|
|||
resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55"
|
||||
integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==
|
||||
|
||||
entities@^4.2.0, entities@^4.3.0, entities@^4.3.1:
|
||||
entities@^4.3.1:
|
||||
version "4.3.1"
|
||||
resolved "https://registry.yarnpkg.com/entities/-/entities-4.3.1.tgz#c34062a94c865c322f9d67b4384e4169bcede6a4"
|
||||
integrity sha512-o4q/dYJlmyjP2zfnaWDUC6A3BQFmVTX+tZPezK7k0GLSU9QYCauscf5Y+qcEPzKL+EixVouYDgLQK5H9GrLpkg==
|
||||
|
@ -6463,16 +6376,6 @@ htmlparser2@^6.1.0:
|
|||
domutils "^2.5.2"
|
||||
entities "^2.0.0"
|
||||
|
||||
htmlparser2@^8.0.1:
|
||||
version "8.0.1"
|
||||
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-8.0.1.tgz#abaa985474fcefe269bc761a779b544d7196d010"
|
||||
integrity sha512-4lVbmc1diZC7GUJQtRQ5yBAeUCL1exyMwmForWkRLnwyzWBFxN633SALPMGYaWZvKe9j1pRZJpauvmxENSp/EA==
|
||||
dependencies:
|
||||
domelementtype "^2.3.0"
|
||||
domhandler "^5.0.2"
|
||||
domutils "^3.0.1"
|
||||
entities "^4.3.0"
|
||||
|
||||
http-deceiver@^1.2.7:
|
||||
version "1.2.7"
|
||||
resolved "https://registry.yarnpkg.com/http-deceiver/-/http-deceiver-1.2.7.tgz#fa7168944ab9a519d337cb0bec7284dc3e723d87"
|
||||
|
@ -8705,13 +8608,6 @@ nth-check@^2.0.0:
|
|||
dependencies:
|
||||
boolbase "^1.0.0"
|
||||
|
||||
nth-check@^2.0.1:
|
||||
version "2.1.1"
|
||||
resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-2.1.1.tgz#c9eab428effce36cd6b92c924bdb000ef1f1ed1d"
|
||||
integrity sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==
|
||||
dependencies:
|
||||
boolbase "^1.0.0"
|
||||
|
||||
num2fraction@^1.2.2:
|
||||
version "1.2.2"
|
||||
resolved "https://registry.yarnpkg.com/num2fraction/-/num2fraction-1.2.2.tgz#6f682b6a027a4e9ddfa4564cd2589d1d4e669ede"
|
||||
|
@ -9016,26 +8912,11 @@ parse-passwd@^1.0.0:
|
|||
resolved "https://registry.yarnpkg.com/parse-passwd/-/parse-passwd-1.0.0.tgz#6d5b934a456993b23d37f40a382d6f1666a8e5c6"
|
||||
integrity sha512-1Y1A//QUXEZK7YKz+rD9WydcE1+EuPr6ZBgKecAB8tmoW6UFv0NREVJe1p+jRxtThkcbbKkfwIbWJe/IeE6m2Q==
|
||||
|
||||
parse5-htmlparser2-tree-adapter@^7.0.0:
|
||||
version "7.0.0"
|
||||
resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz#23c2cc233bcf09bb7beba8b8a69d46b08c62c2f1"
|
||||
integrity sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==
|
||||
dependencies:
|
||||
domhandler "^5.0.2"
|
||||
parse5 "^7.0.0"
|
||||
|
||||
parse5@6.0.1:
|
||||
version "6.0.1"
|
||||
resolved "https://registry.yarnpkg.com/parse5/-/parse5-6.0.1.tgz#e1a1c085c569b3dc08321184f19a39cc27f7c30b"
|
||||
integrity sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==
|
||||
|
||||
parse5@^7.0.0:
|
||||
version "7.0.0"
|
||||
resolved "https://registry.yarnpkg.com/parse5/-/parse5-7.0.0.tgz#51f74a5257f5fcc536389e8c2d0b3802e1bfa91a"
|
||||
integrity sha512-y/t8IXSPWTuRZqXc0ajH/UwDj4mnqLEbSttNbThcFhGrZuOyoyvNBO85PBp2jQa55wY9d07PBNjsK8ZP3K5U6g==
|
||||
dependencies:
|
||||
entities "^4.3.0"
|
||||
|
||||
parseurl@~1.3.2, parseurl@~1.3.3:
|
||||
version "1.3.3"
|
||||
resolved "https://registry.yarnpkg.com/parseurl/-/parseurl-1.3.3.tgz#9da19e7bee8d12dff0513ed5b76957793bc2e8d4"
|
||||
|
|
Loading…
Reference in a new issue