Client-side language detection

Signed-off-by: marcin mikołajczak <git@mkljczk.pl>
This commit is contained in:
marcin mikołajczak 2024-05-18 15:47:36 +02:00
parent 0d2b7f026f
commit 15ad2ea91f
9 changed files with 89 additions and 16 deletions

View file

@ -117,6 +117,7 @@
"escape-html": "^1.0.3",
"eslint-plugin-formatjs": "^4.12.2",
"exifr": "^7.1.3",
"fasttext.wasm.js": "^1.0.0",
"fuzzysort": "^3.0.0",
"graphemesplit": "^2.4.4",
"html-react-parser": "^5.0.0",

View file

@ -91,6 +91,7 @@ const COMPOSE_EDITOR_STATE_SET = 'COMPOSE_EDITOR_STATE_SET' as const;
const COMPOSE_CHANGE_MEDIA_ORDER = 'COMPOSE_CHANGE_MEDIA_ORDER' as const;
const COMPOSE_ADD_SUGGESTED_QUOTE = 'COMPOSE_ADD_SUGGESTED_QUOTE' as const;
const COMPOSE_ADD_SUGGESTED_LANGUAGE = 'COMPOSE_ADD_SUGGESTED_LANGUAGE' as const;
const getAccount = makeGetAccount();
@ -380,7 +381,7 @@ const submitCompose = (composeId: string, opts: SubmitComposeOpts = {}) =>
content_type: contentType,
poll: compose.poll,
scheduled_at: compose.schedule,
language: compose.language,
language: compose.language || compose.suggested_language,
to,
};
@ -875,7 +876,7 @@ const eventDiscussionCompose = (composeId: string, status: Status) =>
const setEditorState = (composeId: string, editorState: EditorState | string | null, text?: string) => ({
type: COMPOSE_EDITOR_STATE_SET,
id: composeId,
editorState: editorState,
editorState,
text,
});
@ -889,7 +890,13 @@ const changeMediaOrder = (composeId: string, a: string, b: string) => ({
const addSuggestedQuote = (composeId: string, quoteId: string) => ({
type: COMPOSE_ADD_SUGGESTED_QUOTE,
id: composeId,
quoteId: quoteId,
quoteId,
});
const addSuggestedLanguage = (composeId: string, language: string) => ({
type: COMPOSE_ADD_SUGGESTED_LANGUAGE,
id: composeId,
language,
});
type ComposeAction =
@ -940,6 +947,7 @@ type ComposeAction =
| ReturnType<typeof setEditorState>
| ReturnType<typeof changeMediaOrder>
| ReturnType<typeof addSuggestedQuote>
| ReturnType<typeof addSuggestedLanguage>
export {
COMPOSE_CHANGE,
@ -990,6 +998,7 @@ export {
COMPOSE_EDITOR_STATE_SET,
COMPOSE_CHANGE_MEDIA_ORDER,
COMPOSE_ADD_SUGGESTED_QUOTE,
COMPOSE_ADD_SUGGESTED_LANGUAGE,
setComposeToStatus,
changeCompose,
replyCompose,
@ -1047,5 +1056,6 @@ export {
setEditorState,
changeMediaOrder,
addSuggestedQuote,
addSuggestedLanguage,
type ComposeAction,
};

View file

@ -27,6 +27,7 @@ const languages = Object.entries(languagesObject) as Array<[Language, string]>;
const messages = defineMessages({
languagePrompt: { id: 'compose.language_dropdown.prompt', defaultMessage: 'Select language' },
languageSuggestion: { id: 'compose.language_dropdown.suggestion', defaultMessage: '{language} (detected)' },
search: { id: 'compose.language_dropdown.search', defaultMessage: 'Search language…' },
});
@ -61,7 +62,7 @@ const LanguageDropdown: React.FC<ILanguageDropdown> = ({ composeId }) => {
],
});
const language = useCompose(composeId).language;
const { language, suggested_language: suggestedLanguage } = useCompose(composeId);
const handleClick: React.EventHandler<
React.MouseEvent<HTMLButtonElement> | React.KeyboardEvent<HTMLButtonElement>
@ -253,12 +254,18 @@ const LanguageDropdown: React.FC<ILanguageDropdown> = ({ composeId }) => {
const isSearching = searchValue !== '';
const results = search();
let buttonLabel = intl.formatMessage(messages.languagePrompt);
if (language) buttonLabel = languagesObject[language];
else if (suggestedLanguage) buttonLabel = intl.formatMessage(messages.languageSuggestion, {
language: languagesObject[suggestedLanguage as Language] || suggestedLanguage,
});
return (
<>
<Button
theme='muted'
size='xs'
text={language ? languagesObject[language] : intl.formatMessage(messages.languagePrompt)}
text={buttonLabel}
icon={require('@tabler/icons/outline/language.svg')}
secondaryIcon={require('@tabler/icons/outline/chevron-down.svg')}
title={intl.formatMessage(messages.languagePrompt)}

View file

@ -1,14 +1,17 @@
import { useLexicalComposerContext } from '@lexical/react/LexicalComposerContext';
import { $createRemarkExport } from '@mkljczk/lexical-remark';
import { type LanguageIdentificationModel } from 'fasttext.wasm.js/dist/models/language-identification/common.js';
import { $getRoot } from 'lexical';
import debounce from 'lodash/debounce';
import { useCallback, useEffect } from 'react';
import { addSuggestedQuote, setEditorState } from 'soapbox/actions/compose';
import { addSuggestedLanguage, addSuggestedQuote, setEditorState } from 'soapbox/actions/compose';
import { fetchStatus } from 'soapbox/actions/statuses';
import { useAppDispatch, useFeatures } from 'soapbox/hooks';
import { getStatusIdsFromLinksInContent } from 'soapbox/utils/status';
let lidModel: LanguageIdentificationModel;
interface IStatePlugin {
composeId: string;
isWysiwyg?: boolean;
@ -50,9 +53,35 @@ const StatePlugin: React.FC<IStatePlugin> = ({ composeId, isWysiwyg }) => {
});
}, 2000), []);
const detectLanguage = useCallback(debounce(async (text: string) => {
dispatch(async (dispatch, getState) => {
const state = getState();
const compose = state.compose.get(composeId);
if (!features.postLanguages || features.languageDetection || compose?.language) return;
const wordsLength = text.split(/\s+/).length;
if (wordsLength < 4) return;
if (!lidModel) {
// eslint-disable-next-line import/extensions
const { getLIDModel } = await import('fasttext.wasm.js/common');
lidModel = await getLIDModel();
}
if (!lidModel.model) await lidModel.load();
const { alpha2, possibility } = await lidModel.identify(text.replace(/\s+/i, ' '));
if (alpha2 && possibility > 0.5) {
dispatch(addSuggestedLanguage(composeId, alpha2));
}
});
}, 750), []);
useEffect(() => {
editor.registerUpdateListener(({ editorState }) => {
let text;
const plainText = editorState.read(() => $getRoot().getTextContent());
let text = plainText;
if (isWysiwyg) {
text = editorState.read($createRemarkExport({
handlers: {
@ -60,13 +89,12 @@ const StatePlugin: React.FC<IStatePlugin> = ({ composeId, isWysiwyg }) => {
mention: (node) => ({ type: 'text', value: node.getTextContent() }),
},
}));
} else {
text = editorState.read(() => $getRoot().getTextContent());
}
const isEmpty = text === '';
const data = isEmpty ? null : JSON.stringify(editorState.toJSON());
dispatch(setEditorState(composeId, data, text));
getQuoteSuggestions(text);
getQuoteSuggestions(plainText);
detectLanguage(plainText);
});
}, [editor]);

View file

@ -56,6 +56,7 @@ import {
COMPOSE_CHANGE_MEDIA_ORDER,
COMPOSE_ADD_SUGGESTED_QUOTE,
ComposeAction,
COMPOSE_ADD_SUGGESTED_LANGUAGE,
} from '../actions/compose';
import { EVENT_COMPOSE_CANCEL, EVENT_FORM_SET, type EventsAction } from '../actions/events';
import { ME_FETCH_SUCCESS, ME_PATCH_SUCCESS, MeAction } from '../actions/me';
@ -114,6 +115,7 @@ const ReducerCompose = ImmutableRecord({
parent_reblogged_by: null as string | null,
dismissed_quotes: ImmutableOrderedSet<string>(),
language: null as Language | null,
suggested_language: null as string | null,
});
type State = ImmutableMap<string, Compose>;
@ -553,6 +555,9 @@ const compose = (state = initialState, action: ComposeAction | EventsAction | Me
case COMPOSE_ADD_SUGGESTED_QUOTE:
return updateCompose(state, action.id, compose => compose
.set('quote', action.quoteId));
case COMPOSE_ADD_SUGGESTED_LANGUAGE:
return updateCompose(state, action.id, compose => compose
.set('suggested_language', action.language));
case COMPOSE_QUOTE_CANCEL:
return updateCompose(state, action.id, compose => compose
.update('dismissed_quotes', quotes => compose.quote ? quotes.add(compose.quote) : quotes)

View file

@ -21,10 +21,6 @@ const fixVersion = (version: string) => {
version = '2.7.2 (compatible; Pleroma 2.4.50+akkoma)';
}
if (version.endsWith('+pl)')) {
version = version.slice(0, -3) + 'soapbox)';
}
// Set Takahē version to a Pleroma-like string
if (version.startsWith('takahe/')) {
version = `0.0.0 (compatible; Takahe ${version.slice(7)})`;

View file

@ -567,6 +567,11 @@ const getInstanceFeatures = (instance: Instance) => {
v.software === PLEROMA && v.build === REBASED && gte(v.version, '2.5.54'),
]),
/**
* TODO: Replace with proper feature gate.
*/
languageDetection: v.software === PLEROMA && v.build === REBASED,
/**
* Can create, view, and manage lists.
* @see {@link https://docs.joinmastodon.org/methods/lists/}
@ -909,8 +914,11 @@ const parseVersion = (version: string): Backend => {
}) : null;
const compat = match ? semverParse(match[1]) || semverCoerce(match[1]) : null;
if (match && semver && compat) {
let build = semver.build[0];
if (build === 'pl') build = 'soapbox';
return {
build: semver.build[0],
build,
compatVersion: compat.version,
software: match[2] || MASTODON,
version: semver.version.split('-')[0],

View file

@ -82,6 +82,12 @@ const config = defineConfig(({ command }) => ({
}, {
src: './custom/instance',
dest: '.',
}, {
src: './node_modules/fasttext.wasm.js/dist/models/language-identification/assets/lid.176.ftz',
dest: 'fastText/models/',
}, {
src: './node_modules/fasttext.wasm.js/dist/core/fastText.common.wasm',
dest: 'fastText/',
}],
}),
visualizer({

View file

@ -2585,6 +2585,11 @@
dependencies:
"@types/trusted-types" "*"
"@types/emscripten@^1.39.10":
version "1.39.12"
resolved "https://registry.yarnpkg.com/@types/emscripten/-/emscripten-1.39.12.tgz#e43b4fdd4b389861897d6cbb9665532f3afd5abd"
integrity sha512-AQImDBgudQfMqUBfrjZYilRxoHDzTBp+ejh+g1fY67eSMalwIKtBXofjpyI0JBgNpHGzxeGAR2QDya0wxW9zbA==
"@types/escape-html@^1.0.1":
version "1.0.1"
resolved "https://registry.yarnpkg.com/@types/escape-html/-/escape-html-1.0.1.tgz#b19b4646915f0ae2c306bf984dc0a59c5cfc97ba"
@ -5769,6 +5774,13 @@ fastq@^1.6.0:
dependencies:
reusify "^1.0.4"
fasttext.wasm.js@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/fasttext.wasm.js/-/fasttext.wasm.js-1.0.0.tgz#3f89dfc024386af6c0a070ab5b65f0a62a0a79d2"
integrity sha512-Rv2DyM9ZaJ/r09FRIYeVXxsSRFu45CVH3Zu7nheTe/EPCH0Sew1wKf0zQ4VCBmibGRBpeHlpMuRdsp+VC/YwZw==
dependencies:
"@types/emscripten" "^1.39.10"
file-entry-cache@^6.0.1:
version "6.0.1"
resolved "https://registry.yarnpkg.com/file-entry-cache/-/file-entry-cache-6.0.1.tgz#211b2dd9659cb0394b073e7323ac3c933d522027"