Client-side language detection

Signed-off-by: marcin mikołajczak <git@mkljczk.pl>
This commit is contained in:
marcin mikołajczak 2024-05-18 15:47:36 +02:00
parent 0d2b7f026f
commit 15ad2ea91f
9 changed files with 89 additions and 16 deletions

View file

@ -117,6 +117,7 @@
"escape-html": "^1.0.3", "escape-html": "^1.0.3",
"eslint-plugin-formatjs": "^4.12.2", "eslint-plugin-formatjs": "^4.12.2",
"exifr": "^7.1.3", "exifr": "^7.1.3",
"fasttext.wasm.js": "^1.0.0",
"fuzzysort": "^3.0.0", "fuzzysort": "^3.0.0",
"graphemesplit": "^2.4.4", "graphemesplit": "^2.4.4",
"html-react-parser": "^5.0.0", "html-react-parser": "^5.0.0",

View file

@ -90,7 +90,8 @@ const COMPOSE_EDITOR_STATE_SET = 'COMPOSE_EDITOR_STATE_SET' as const;
const COMPOSE_CHANGE_MEDIA_ORDER = 'COMPOSE_CHANGE_MEDIA_ORDER' as const; const COMPOSE_CHANGE_MEDIA_ORDER = 'COMPOSE_CHANGE_MEDIA_ORDER' as const;
const COMPOSE_ADD_SUGGESTED_QUOTE = 'COMPOSE_ADD_SUGGESTED_QUOTE' as const; const COMPOSE_ADD_SUGGESTED_QUOTE = 'COMPOSE_ADD_SUGGESTED_QUOTE' as const;
const COMPOSE_ADD_SUGGESTED_LANGUAGE = 'COMPOSE_ADD_SUGGESTED_LANGUAGE' as const;
const getAccount = makeGetAccount(); const getAccount = makeGetAccount();
@ -380,7 +381,7 @@ const submitCompose = (composeId: string, opts: SubmitComposeOpts = {}) =>
content_type: contentType, content_type: contentType,
poll: compose.poll, poll: compose.poll,
scheduled_at: compose.schedule, scheduled_at: compose.schedule,
language: compose.language, language: compose.language || compose.suggested_language,
to, to,
}; };
@ -875,7 +876,7 @@ const eventDiscussionCompose = (composeId: string, status: Status) =>
const setEditorState = (composeId: string, editorState: EditorState | string | null, text?: string) => ({ const setEditorState = (composeId: string, editorState: EditorState | string | null, text?: string) => ({
type: COMPOSE_EDITOR_STATE_SET, type: COMPOSE_EDITOR_STATE_SET,
id: composeId, id: composeId,
editorState: editorState, editorState,
text, text,
}); });
@ -889,7 +890,13 @@ const changeMediaOrder = (composeId: string, a: string, b: string) => ({
const addSuggestedQuote = (composeId: string, quoteId: string) => ({ const addSuggestedQuote = (composeId: string, quoteId: string) => ({
type: COMPOSE_ADD_SUGGESTED_QUOTE, type: COMPOSE_ADD_SUGGESTED_QUOTE,
id: composeId, id: composeId,
quoteId: quoteId, quoteId,
});
const addSuggestedLanguage = (composeId: string, language: string) => ({
type: COMPOSE_ADD_SUGGESTED_LANGUAGE,
id: composeId,
language,
}); });
type ComposeAction = type ComposeAction =
@ -940,6 +947,7 @@ type ComposeAction =
| ReturnType<typeof setEditorState> | ReturnType<typeof setEditorState>
| ReturnType<typeof changeMediaOrder> | ReturnType<typeof changeMediaOrder>
| ReturnType<typeof addSuggestedQuote> | ReturnType<typeof addSuggestedQuote>
| ReturnType<typeof addSuggestedLanguage>
export { export {
COMPOSE_CHANGE, COMPOSE_CHANGE,
@ -990,6 +998,7 @@ export {
COMPOSE_EDITOR_STATE_SET, COMPOSE_EDITOR_STATE_SET,
COMPOSE_CHANGE_MEDIA_ORDER, COMPOSE_CHANGE_MEDIA_ORDER,
COMPOSE_ADD_SUGGESTED_QUOTE, COMPOSE_ADD_SUGGESTED_QUOTE,
COMPOSE_ADD_SUGGESTED_LANGUAGE,
setComposeToStatus, setComposeToStatus,
changeCompose, changeCompose,
replyCompose, replyCompose,
@ -1047,5 +1056,6 @@ export {
setEditorState, setEditorState,
changeMediaOrder, changeMediaOrder,
addSuggestedQuote, addSuggestedQuote,
addSuggestedLanguage,
type ComposeAction, type ComposeAction,
}; };

View file

@ -27,6 +27,7 @@ const languages = Object.entries(languagesObject) as Array<[Language, string]>;
const messages = defineMessages({ const messages = defineMessages({
languagePrompt: { id: 'compose.language_dropdown.prompt', defaultMessage: 'Select language' }, languagePrompt: { id: 'compose.language_dropdown.prompt', defaultMessage: 'Select language' },
languageSuggestion: { id: 'compose.language_dropdown.suggestion', defaultMessage: '{language} (detected)' },
search: { id: 'compose.language_dropdown.search', defaultMessage: 'Search language…' }, search: { id: 'compose.language_dropdown.search', defaultMessage: 'Search language…' },
}); });
@ -61,7 +62,7 @@ const LanguageDropdown: React.FC<ILanguageDropdown> = ({ composeId }) => {
], ],
}); });
const language = useCompose(composeId).language; const { language, suggested_language: suggestedLanguage } = useCompose(composeId);
const handleClick: React.EventHandler< const handleClick: React.EventHandler<
React.MouseEvent<HTMLButtonElement> | React.KeyboardEvent<HTMLButtonElement> React.MouseEvent<HTMLButtonElement> | React.KeyboardEvent<HTMLButtonElement>
@ -253,12 +254,18 @@ const LanguageDropdown: React.FC<ILanguageDropdown> = ({ composeId }) => {
const isSearching = searchValue !== ''; const isSearching = searchValue !== '';
const results = search(); const results = search();
let buttonLabel = intl.formatMessage(messages.languagePrompt);
if (language) buttonLabel = languagesObject[language];
else if (suggestedLanguage) buttonLabel = intl.formatMessage(messages.languageSuggestion, {
language: languagesObject[suggestedLanguage as Language] || suggestedLanguage,
});
return ( return (
<> <>
<Button <Button
theme='muted' theme='muted'
size='xs' size='xs'
text={language ? languagesObject[language] : intl.formatMessage(messages.languagePrompt)} text={buttonLabel}
icon={require('@tabler/icons/outline/language.svg')} icon={require('@tabler/icons/outline/language.svg')}
secondaryIcon={require('@tabler/icons/outline/chevron-down.svg')} secondaryIcon={require('@tabler/icons/outline/chevron-down.svg')}
title={intl.formatMessage(messages.languagePrompt)} title={intl.formatMessage(messages.languagePrompt)}

View file

@ -1,14 +1,17 @@
import { useLexicalComposerContext } from '@lexical/react/LexicalComposerContext'; import { useLexicalComposerContext } from '@lexical/react/LexicalComposerContext';
import { $createRemarkExport } from '@mkljczk/lexical-remark'; import { $createRemarkExport } from '@mkljczk/lexical-remark';
import { type LanguageIdentificationModel } from 'fasttext.wasm.js/dist/models/language-identification/common.js';
import { $getRoot } from 'lexical'; import { $getRoot } from 'lexical';
import debounce from 'lodash/debounce'; import debounce from 'lodash/debounce';
import { useCallback, useEffect } from 'react'; import { useCallback, useEffect } from 'react';
import { addSuggestedQuote, setEditorState } from 'soapbox/actions/compose'; import { addSuggestedLanguage, addSuggestedQuote, setEditorState } from 'soapbox/actions/compose';
import { fetchStatus } from 'soapbox/actions/statuses'; import { fetchStatus } from 'soapbox/actions/statuses';
import { useAppDispatch, useFeatures } from 'soapbox/hooks'; import { useAppDispatch, useFeatures } from 'soapbox/hooks';
import { getStatusIdsFromLinksInContent } from 'soapbox/utils/status'; import { getStatusIdsFromLinksInContent } from 'soapbox/utils/status';
let lidModel: LanguageIdentificationModel;
interface IStatePlugin { interface IStatePlugin {
composeId: string; composeId: string;
isWysiwyg?: boolean; isWysiwyg?: boolean;
@ -50,9 +53,35 @@ const StatePlugin: React.FC<IStatePlugin> = ({ composeId, isWysiwyg }) => {
}); });
}, 2000), []); }, 2000), []);
const detectLanguage = useCallback(debounce(async (text: string) => {
dispatch(async (dispatch, getState) => {
const state = getState();
const compose = state.compose.get(composeId);
if (!features.postLanguages || features.languageDetection || compose?.language) return;
const wordsLength = text.split(/\s+/).length;
if (wordsLength < 4) return;
if (!lidModel) {
// eslint-disable-next-line import/extensions
const { getLIDModel } = await import('fasttext.wasm.js/common');
lidModel = await getLIDModel();
}
if (!lidModel.model) await lidModel.load();
const { alpha2, possibility } = await lidModel.identify(text.replace(/\s+/i, ' '));
if (alpha2 && possibility > 0.5) {
dispatch(addSuggestedLanguage(composeId, alpha2));
}
});
}, 750), []);
useEffect(() => { useEffect(() => {
editor.registerUpdateListener(({ editorState }) => { editor.registerUpdateListener(({ editorState }) => {
let text; const plainText = editorState.read(() => $getRoot().getTextContent());
let text = plainText;
if (isWysiwyg) { if (isWysiwyg) {
text = editorState.read($createRemarkExport({ text = editorState.read($createRemarkExport({
handlers: { handlers: {
@ -60,13 +89,12 @@ const StatePlugin: React.FC<IStatePlugin> = ({ composeId, isWysiwyg }) => {
mention: (node) => ({ type: 'text', value: node.getTextContent() }), mention: (node) => ({ type: 'text', value: node.getTextContent() }),
}, },
})); }));
} else {
text = editorState.read(() => $getRoot().getTextContent());
} }
const isEmpty = text === ''; const isEmpty = text === '';
const data = isEmpty ? null : JSON.stringify(editorState.toJSON()); const data = isEmpty ? null : JSON.stringify(editorState.toJSON());
dispatch(setEditorState(composeId, data, text)); dispatch(setEditorState(composeId, data, text));
getQuoteSuggestions(text); getQuoteSuggestions(plainText);
detectLanguage(plainText);
}); });
}, [editor]); }, [editor]);

View file

@ -56,6 +56,7 @@ import {
COMPOSE_CHANGE_MEDIA_ORDER, COMPOSE_CHANGE_MEDIA_ORDER,
COMPOSE_ADD_SUGGESTED_QUOTE, COMPOSE_ADD_SUGGESTED_QUOTE,
ComposeAction, ComposeAction,
COMPOSE_ADD_SUGGESTED_LANGUAGE,
} from '../actions/compose'; } from '../actions/compose';
import { EVENT_COMPOSE_CANCEL, EVENT_FORM_SET, type EventsAction } from '../actions/events'; import { EVENT_COMPOSE_CANCEL, EVENT_FORM_SET, type EventsAction } from '../actions/events';
import { ME_FETCH_SUCCESS, ME_PATCH_SUCCESS, MeAction } from '../actions/me'; import { ME_FETCH_SUCCESS, ME_PATCH_SUCCESS, MeAction } from '../actions/me';
@ -114,6 +115,7 @@ const ReducerCompose = ImmutableRecord({
parent_reblogged_by: null as string | null, parent_reblogged_by: null as string | null,
dismissed_quotes: ImmutableOrderedSet<string>(), dismissed_quotes: ImmutableOrderedSet<string>(),
language: null as Language | null, language: null as Language | null,
suggested_language: null as string | null,
}); });
type State = ImmutableMap<string, Compose>; type State = ImmutableMap<string, Compose>;
@ -553,6 +555,9 @@ const compose = (state = initialState, action: ComposeAction | EventsAction | Me
case COMPOSE_ADD_SUGGESTED_QUOTE: case COMPOSE_ADD_SUGGESTED_QUOTE:
return updateCompose(state, action.id, compose => compose return updateCompose(state, action.id, compose => compose
.set('quote', action.quoteId)); .set('quote', action.quoteId));
case COMPOSE_ADD_SUGGESTED_LANGUAGE:
return updateCompose(state, action.id, compose => compose
.set('suggested_language', action.language));
case COMPOSE_QUOTE_CANCEL: case COMPOSE_QUOTE_CANCEL:
return updateCompose(state, action.id, compose => compose return updateCompose(state, action.id, compose => compose
.update('dismissed_quotes', quotes => compose.quote ? quotes.add(compose.quote) : quotes) .update('dismissed_quotes', quotes => compose.quote ? quotes.add(compose.quote) : quotes)

View file

@ -21,10 +21,6 @@ const fixVersion = (version: string) => {
version = '2.7.2 (compatible; Pleroma 2.4.50+akkoma)'; version = '2.7.2 (compatible; Pleroma 2.4.50+akkoma)';
} }
if (version.endsWith('+pl)')) {
version = version.slice(0, -3) + 'soapbox)';
}
// Set Takahē version to a Pleroma-like string // Set Takahē version to a Pleroma-like string
if (version.startsWith('takahe/')) { if (version.startsWith('takahe/')) {
version = `0.0.0 (compatible; Takahe ${version.slice(7)})`; version = `0.0.0 (compatible; Takahe ${version.slice(7)})`;

View file

@ -567,6 +567,11 @@ const getInstanceFeatures = (instance: Instance) => {
v.software === PLEROMA && v.build === REBASED && gte(v.version, '2.5.54'), v.software === PLEROMA && v.build === REBASED && gte(v.version, '2.5.54'),
]), ]),
/**
* TODO: Replace with proper feature gate.
*/
languageDetection: v.software === PLEROMA && v.build === REBASED,
/** /**
* Can create, view, and manage lists. * Can create, view, and manage lists.
* @see {@link https://docs.joinmastodon.org/methods/lists/} * @see {@link https://docs.joinmastodon.org/methods/lists/}
@ -909,8 +914,11 @@ const parseVersion = (version: string): Backend => {
}) : null; }) : null;
const compat = match ? semverParse(match[1]) || semverCoerce(match[1]) : null; const compat = match ? semverParse(match[1]) || semverCoerce(match[1]) : null;
if (match && semver && compat) { if (match && semver && compat) {
let build = semver.build[0];
if (build === 'pl') build = 'soapbox';
return { return {
build: semver.build[0], build,
compatVersion: compat.version, compatVersion: compat.version,
software: match[2] || MASTODON, software: match[2] || MASTODON,
version: semver.version.split('-')[0], version: semver.version.split('-')[0],

View file

@ -82,6 +82,12 @@ const config = defineConfig(({ command }) => ({
}, { }, {
src: './custom/instance', src: './custom/instance',
dest: '.', dest: '.',
}, {
src: './node_modules/fasttext.wasm.js/dist/models/language-identification/assets/lid.176.ftz',
dest: 'fastText/models/',
}, {
src: './node_modules/fasttext.wasm.js/dist/core/fastText.common.wasm',
dest: 'fastText/',
}], }],
}), }),
visualizer({ visualizer({

View file

@ -2585,6 +2585,11 @@
dependencies: dependencies:
"@types/trusted-types" "*" "@types/trusted-types" "*"
"@types/emscripten@^1.39.10":
version "1.39.12"
resolved "https://registry.yarnpkg.com/@types/emscripten/-/emscripten-1.39.12.tgz#e43b4fdd4b389861897d6cbb9665532f3afd5abd"
integrity sha512-AQImDBgudQfMqUBfrjZYilRxoHDzTBp+ejh+g1fY67eSMalwIKtBXofjpyI0JBgNpHGzxeGAR2QDya0wxW9zbA==
"@types/escape-html@^1.0.1": "@types/escape-html@^1.0.1":
version "1.0.1" version "1.0.1"
resolved "https://registry.yarnpkg.com/@types/escape-html/-/escape-html-1.0.1.tgz#b19b4646915f0ae2c306bf984dc0a59c5cfc97ba" resolved "https://registry.yarnpkg.com/@types/escape-html/-/escape-html-1.0.1.tgz#b19b4646915f0ae2c306bf984dc0a59c5cfc97ba"
@ -5769,6 +5774,13 @@ fastq@^1.6.0:
dependencies: dependencies:
reusify "^1.0.4" reusify "^1.0.4"
fasttext.wasm.js@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/fasttext.wasm.js/-/fasttext.wasm.js-1.0.0.tgz#3f89dfc024386af6c0a070ab5b65f0a62a0a79d2"
integrity sha512-Rv2DyM9ZaJ/r09FRIYeVXxsSRFu45CVH3Zu7nheTe/EPCH0Sew1wKf0zQ4VCBmibGRBpeHlpMuRdsp+VC/YwZw==
dependencies:
"@types/emscripten" "^1.39.10"
file-entry-cache@^6.0.1: file-entry-cache@^6.0.1:
version "6.0.1" version "6.0.1"
resolved "https://registry.yarnpkg.com/file-entry-cache/-/file-entry-cache-6.0.1.tgz#211b2dd9659cb0394b073e7323ac3c933d522027" resolved "https://registry.yarnpkg.com/file-entry-cache/-/file-entry-cache-6.0.1.tgz#211b2dd9659cb0394b073e7323ac3c933d522027"