From 9d0a3b7a694debbd3c9df4616202c1b38b711bfd Mon Sep 17 00:00:00 2001 From: ewwwwwwww Date: Mon, 4 Jul 2022 21:27:17 -0700 Subject: [PATCH] remove cheerio and add custom html parser --- app/soapbox/features/emoji/index.ts | 96 +++++++++++++--------- package.json | 2 - yarn.lock | 121 +--------------------------- 3 files changed, 57 insertions(+), 162 deletions(-) diff --git a/app/soapbox/features/emoji/index.ts b/app/soapbox/features/emoji/index.ts index b9cc3fd8fd..c85990f170 100644 --- a/app/soapbox/features/emoji/index.ts +++ b/app/soapbox/features/emoji/index.ts @@ -1,10 +1,5 @@ -// import data from '@emoji-mart/data'; -import { load as cheerioLoad } from 'cheerio'; -import { parseDocument } from 'htmlparser2'; - import unicodeMapping from './mapping'; -import type { Node as CheerioNode } from 'cheerio'; import type { Emoji as EmojiMart, CustomEmoji as EmojiMartCustom } from 'emoji-mart'; /* @@ -99,24 +94,24 @@ const popStack = (stack: string, open: boolean) => { // TODO: handle grouped unicode emojis export const emojifyText = (str: string, customEmojis = {}) => { - let res = ''; + let buf = ''; let stack = ''; let open = false; for (const c of Array.from(str)) { // chunk by unicode codepoint with Array.from if (c in unicodeMapping) { if (open) { // unicode emoji inside colon - res += popStack(stack, open); + buf += popStack(stack, open); } - res += convertUnicode(c); + buf += convertUnicode(c); } else if (c === ':') { stack += ':'; // we see another : we convert it and clear the stack buffer if (open) { - res += convertEmoji(stack, customEmojis); + buf += convertEmoji(stack, customEmojis); stack = ''; } @@ -128,54 +123,75 @@ export const emojifyText = (str: string, customEmojis = {}) => { // if the stack is non-null and we see invalid chars it's a string not emoji // so we push it to the return result and clear it if (!validEmojiChar(c)) { - res += popStack(stack, open); + buf += popStack(stack, open); } } else { - res += c; + buf += c; } } } // never found a closing colon so it's just a raw string if (open) { - res += stack; + buf += stack; } - return res; + return buf; }; -// const parseHmtl = (str: string) => { -// const ret = []; -// let depth = 0; -// -// return ret; -// } +const parseHTML = (str: string): { text: boolean, data: string }[] => { + const tokens = []; + let buf = ''; + let stack = ''; + let open = false; -const filterTextNodes = (idx: number, el: CheerioNode) => { - return el.nodeType === Node.TEXT_NODE; + for (const c of str) { + if (c === '<') { + if (open) { + tokens.push({ text: true, data: stack }); + stack = '<'; + } else { + tokens.push({ text: true, data: buf }); + stack = '<'; + open = true; + } + } else if (c === '>') { + if (open) { + open = false; + tokens.push({ text: false, data: stack + '>' }); + stack = ''; + buf = ''; + } else { + buf += '>'; + } + + } else { + if (open) { + stack += c; + } else { + buf += c; + } + } + } + + if (open) { + tokens.push({ text: true, data: buf + stack }); + } else if (buf !== '') { + tokens.push({ text: true, data: buf }); + } + + return tokens; }; const emojify = (str: string, customEmojis = {}) => { - const dom = parseDocument(str); - const $ = cheerioLoad(dom, { - xmlMode: true, - decodeEntities: false, - }); + return parseHTML(str) + .map(({ text, data }) => { + if (!text) return data; + if (data.length === 0 || data === ' ') return data; - $.root() - .contents() // iterate over flat map of all html elements - .filter(filterTextNodes) // only iterate over text nodes - .each((idx, el) => { - // skip common case - // @ts-ignore - if (el.data.length === 0 || el.data === ' ') return; - - // mutating el.data is incorrect but we do it to prevent a second dom parse - // @ts-ignore - el.data = emojifyText(el.data, customEmojis); - }); - - return $.html(); + return emojifyText(data, customEmojis); + }) + .join(''); }; export default emojify; diff --git a/package.json b/package.json index 5cf5c53b84..d849881b2a 100644 --- a/package.json +++ b/package.json @@ -104,7 +104,6 @@ "bootstrap-icons": "^1.5.0", "bowser": "^2.11.0", "browserslist": "^4.16.6", - "cheerio": "^1.0.0-rc.12", "classnames": "^2.2.5", "copy-webpack-plugin": "^9.0.1", "core-js": "^3.15.2", @@ -125,7 +124,6 @@ "history": "^4.10.1", "html-webpack-harddisk-plugin": "^2.0.0", "html-webpack-plugin": "^5.5.0", - "htmlparser2": "^8.0.1", "http-link-header": "^1.0.2", "immutable": "^4.0.0", "imports-loader": "^4.0.0", diff --git a/yarn.lock b/yarn.lock index 61cfe48636..bc422d5e23 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1271,13 +1271,6 @@ dependencies: regenerator-runtime "^0.12.0" -"@babel/runtime@^7.0.0": - version "7.18.6" - resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.18.6.tgz#6a1ef59f838debd670421f8c7f2cbb8da9751580" - integrity sha512-t9wi7/AW6XtKahAe20Yw0/mMljKq0B1r2fPdvaAdV/KPDZewFXdaaa6K7lxmZBZ8FBNpCiAT6iHPmd6QO9bKfQ== - dependencies: - regenerator-runtime "^0.13.4" - "@babel/runtime@^7.1.2", "@babel/runtime@^7.10.2", "@babel/runtime@^7.11.2", "@babel/runtime@^7.12.1", "@babel/runtime@^7.2.0", "@babel/runtime@^7.8.4", "@babel/runtime@^7.9.2": version "7.15.4" resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.15.4.tgz#fd17d16bfdf878e6dd02d19753a39fa8a8d9c84a" @@ -4079,31 +4072,6 @@ charcodes@^0.2.0: resolved "https://registry.yarnpkg.com/charcodes/-/charcodes-0.2.0.tgz#5208d327e6cc05f99eb80ffc814707572d1f14e4" integrity sha512-Y4kiDb+AM4Ecy58YkuZrrSRJBDQdQ2L+NyS1vHHFtNtUjgutcZfx3yp1dAONI/oPaPmyGfCLx5CxL+zauIMyKQ== -cheerio-select@^2.1.0: - version "2.1.0" - resolved "https://registry.yarnpkg.com/cheerio-select/-/cheerio-select-2.1.0.tgz#4d8673286b8126ca2a8e42740d5e3c4884ae21b4" - integrity sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g== - dependencies: - boolbase "^1.0.0" - css-select "^5.1.0" - css-what "^6.1.0" - domelementtype "^2.3.0" - domhandler "^5.0.3" - domutils "^3.0.1" - -cheerio@^1.0.0-rc.12: - version "1.0.0-rc.12" - resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.12.tgz#788bf7466506b1c6bf5fae51d24a2c4d62e47683" - integrity sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q== - dependencies: - cheerio-select "^2.1.0" - dom-serializer "^2.0.0" - domhandler "^5.0.3" - domutils "^3.0.1" - htmlparser2 "^8.0.1" - parse5 "^7.0.0" - parse5-htmlparser2-tree-adapter "^7.0.0" - "chokidar@>=3.0.0 <4.0.0": version "3.5.2" resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.2.tgz#dba3976fcadb016f66fd365021d91600d01c1e75" @@ -4496,17 +4464,6 @@ css-select@^4.1.3: domutils "^2.6.0" nth-check "^2.0.0" -css-select@^5.1.0: - version "5.1.0" - resolved "https://registry.yarnpkg.com/css-select/-/css-select-5.1.0.tgz#b8ebd6554c3637ccc76688804ad3f6a6fdaea8a6" - integrity sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg== - dependencies: - boolbase "^1.0.0" - css-what "^6.1.0" - domhandler "^5.0.2" - domutils "^3.0.1" - nth-check "^2.0.1" - css-system-font-keywords@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/css-system-font-keywords/-/css-system-font-keywords-1.0.0.tgz#85c6f086aba4eb32c571a3086affc434b84823ed" @@ -4525,11 +4482,6 @@ css-what@^5.0.0: resolved "https://registry.yarnpkg.com/css-what/-/css-what-5.0.1.tgz#3efa820131f4669a8ac2408f9c32e7c7de9f4cad" integrity sha512-FYDTSHb/7KXsWICVsxdmiExPjCfRC4qRFBdVwv7Ax9hMnvMmEjP9RfxTEZ3qPZGmADDn2vAKSo9UcN1jKVYscg== -css-what@^6.1.0: - version "6.1.0" - resolved "https://registry.yarnpkg.com/css-what/-/css-what-6.1.0.tgz#fb5effcf76f1ddea2c81bdfaa4de44e79bac70f4" - integrity sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw== - css.escape@^1.5.1: version "1.5.1" resolved "https://registry.yarnpkg.com/css.escape/-/css.escape-1.5.1.tgz#42e27d4fa04ae32f931a4b4d4191fa9cddee97cb" @@ -4952,15 +4904,6 @@ dom-serializer@^1.0.1: domhandler "^4.2.0" entities "^2.0.0" -dom-serializer@^2.0.0: - version "2.0.0" - resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-2.0.0.tgz#e41b802e1eedf9f6cae183ce5e622d789d7d8e53" - integrity sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg== - dependencies: - domelementtype "^2.3.0" - domhandler "^5.0.2" - entities "^4.2.0" - domelementtype@1, domelementtype@^1.3.1: version "1.3.1" resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.3.1.tgz#d048c44b37b0d10a7f2a3d5fee3f4333d790481f" @@ -4971,11 +4914,6 @@ domelementtype@^2.0.1, domelementtype@^2.2.0: resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.2.0.tgz#9a0b6c2782ed6a1c7323d42267183df9bd8b1d57" integrity sha512-DtBMo82pv1dFtUmHyr48beiuq792Sxohr+8Hm9zoxklYPfa6n0Z3Byjj2IV7bmr2IyqClnqEQhfgHJJ5QF0R5A== -domelementtype@^2.3.0: - version "2.3.0" - resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.3.0.tgz#5c45e8e869952626331d7aab326d01daf65d589d" - integrity sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw== - domexception@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/domexception/-/domexception-1.0.1.tgz#937442644ca6a31261ef36e3ec677fe805582c90" @@ -5004,13 +4942,6 @@ domhandler@^4.0.0, domhandler@^4.2.0: dependencies: domelementtype "^2.2.0" -domhandler@^5.0.1, domhandler@^5.0.2, domhandler@^5.0.3: - version "5.0.3" - resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-5.0.3.tgz#cc385f7f751f1d1fc650c21374804254538c7d31" - integrity sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w== - dependencies: - domelementtype "^2.3.0" - domutils@^1.5.1: version "1.7.0" resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.7.0.tgz#56ea341e834e06e6748af7a1cb25da67ea9f8c2a" @@ -5028,15 +4959,6 @@ domutils@^2.5.2, domutils@^2.6.0: domelementtype "^2.2.0" domhandler "^4.2.0" -domutils@^3.0.1: - version "3.0.1" - resolved "https://registry.yarnpkg.com/domutils/-/domutils-3.0.1.tgz#696b3875238338cb186b6c0612bd4901c89a4f1c" - integrity sha512-z08c1l761iKhDFtfXO04C7kTdPBLi41zwOZl00WS8b5eiaebNpY00HKbztwBq+e3vyqWNwWF3mP9YLUeqIrF+Q== - dependencies: - dom-serializer "^2.0.0" - domelementtype "^2.3.0" - domhandler "^5.0.1" - dot-case@^3.0.4: version "3.0.4" resolved "https://registry.yarnpkg.com/dot-case/-/dot-case-3.0.4.tgz#9b2b670d00a431667a8a75ba29cd1b98809ce751" @@ -5102,15 +5024,6 @@ emoji-datasource@5.0.0: resolved "https://registry.yarnpkg.com/emoji-datasource/-/emoji-datasource-5.0.0.tgz#1522fdba3c52223a1cf5a1c1fc282935400eaa06" integrity sha512-LuvLWFnxznTH++GytEzpzOPUo1SB+6CUFqIlVETJJ3x9fpyMCKFfyqberbhMLOpT1qcNe+km+zoyBeUSC3u5Rw== -"emoji-mart-old@npm:emoji-mart-lazyload": - version "3.0.1-j" - resolved "https://registry.yarnpkg.com/emoji-mart-lazyload/-/emoji-mart-lazyload-3.0.1-j.tgz#87a90d30b79d9145ece078d53e3e683c1a10ce9c" - integrity sha512-0wKF7MR0/iAeCIoiBLY+JjXCugycTgYRC2SL0y9/bjNSQlbeMdzILmPQJAufU/mgLFDUitOvjxLDhOZ9yxZ48g== - dependencies: - "@babel/runtime" "^7.0.0" - intersection-observer "^0.12.0" - prop-types "^15.6.0" - emoji-mart@^5.1.0: version "5.1.0" resolved "https://registry.yarnpkg.com/emoji-mart/-/emoji-mart-5.1.0.tgz#8a36a872e1297747342d1385bd7b7141ac2f4365" @@ -5181,7 +5094,7 @@ entities@^2.0.0: resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55" integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A== -entities@^4.2.0, entities@^4.3.0, entities@^4.3.1: +entities@^4.3.1: version "4.3.1" resolved "https://registry.yarnpkg.com/entities/-/entities-4.3.1.tgz#c34062a94c865c322f9d67b4384e4169bcede6a4" integrity sha512-o4q/dYJlmyjP2zfnaWDUC6A3BQFmVTX+tZPezK7k0GLSU9QYCauscf5Y+qcEPzKL+EixVouYDgLQK5H9GrLpkg== @@ -6463,16 +6376,6 @@ htmlparser2@^6.1.0: domutils "^2.5.2" entities "^2.0.0" -htmlparser2@^8.0.1: - version "8.0.1" - resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-8.0.1.tgz#abaa985474fcefe269bc761a779b544d7196d010" - integrity sha512-4lVbmc1diZC7GUJQtRQ5yBAeUCL1exyMwmForWkRLnwyzWBFxN633SALPMGYaWZvKe9j1pRZJpauvmxENSp/EA== - dependencies: - domelementtype "^2.3.0" - domhandler "^5.0.2" - domutils "^3.0.1" - entities "^4.3.0" - http-deceiver@^1.2.7: version "1.2.7" resolved "https://registry.yarnpkg.com/http-deceiver/-/http-deceiver-1.2.7.tgz#fa7168944ab9a519d337cb0bec7284dc3e723d87" @@ -8705,13 +8608,6 @@ nth-check@^2.0.0: dependencies: boolbase "^1.0.0" -nth-check@^2.0.1: - version "2.1.1" - resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-2.1.1.tgz#c9eab428effce36cd6b92c924bdb000ef1f1ed1d" - integrity sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w== - dependencies: - boolbase "^1.0.0" - num2fraction@^1.2.2: version "1.2.2" resolved "https://registry.yarnpkg.com/num2fraction/-/num2fraction-1.2.2.tgz#6f682b6a027a4e9ddfa4564cd2589d1d4e669ede" @@ -9016,26 +8912,11 @@ parse-passwd@^1.0.0: resolved "https://registry.yarnpkg.com/parse-passwd/-/parse-passwd-1.0.0.tgz#6d5b934a456993b23d37f40a382d6f1666a8e5c6" integrity sha512-1Y1A//QUXEZK7YKz+rD9WydcE1+EuPr6ZBgKecAB8tmoW6UFv0NREVJe1p+jRxtThkcbbKkfwIbWJe/IeE6m2Q== -parse5-htmlparser2-tree-adapter@^7.0.0: - version "7.0.0" - resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz#23c2cc233bcf09bb7beba8b8a69d46b08c62c2f1" - integrity sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g== - dependencies: - domhandler "^5.0.2" - parse5 "^7.0.0" - parse5@6.0.1: version "6.0.1" resolved "https://registry.yarnpkg.com/parse5/-/parse5-6.0.1.tgz#e1a1c085c569b3dc08321184f19a39cc27f7c30b" integrity sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw== -parse5@^7.0.0: - version "7.0.0" - resolved "https://registry.yarnpkg.com/parse5/-/parse5-7.0.0.tgz#51f74a5257f5fcc536389e8c2d0b3802e1bfa91a" - integrity sha512-y/t8IXSPWTuRZqXc0ajH/UwDj4mnqLEbSttNbThcFhGrZuOyoyvNBO85PBp2jQa55wY9d07PBNjsK8ZP3K5U6g== - dependencies: - entities "^4.3.0" - parseurl@~1.3.2, parseurl@~1.3.3: version "1.3.3" resolved "https://registry.yarnpkg.com/parseurl/-/parseurl-1.3.3.tgz#9da19e7bee8d12dff0513ed5b76957793bc2e8d4"