Language detection
Signed-off-by: marcin mikołajczak <git@mkljczk.pl>
This commit is contained in:
parent
d094cdf55b
commit
4c1d0dbb69
7 changed files with 147 additions and 16 deletions
|
@ -3594,5 +3594,27 @@
|
|||
suggestions: ["YOUR_API_KEY"]
|
||||
}
|
||||
]
|
||||
},
|
||||
%{
|
||||
group: :pleroma,
|
||||
key: Pleroma.Language.LanguageDetector,
|
||||
type: :group,
|
||||
description: "Language detection providers",
|
||||
children: [
|
||||
%{
|
||||
key: :provider,
|
||||
type: :module,
|
||||
suggestions: [
|
||||
Pleroma.Language.LanguageDetector.Fasttext
|
||||
]
|
||||
},
|
||||
%{
|
||||
group: {:subgroup, Pleroma.Language.LanguageDetector.Fasttext},
|
||||
key: :model,
|
||||
label: "fastText language detection model",
|
||||
type: :string,
|
||||
suggestions: ["/usr/share/fasttext/lid.176.bin"]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
|
@ -187,7 +187,27 @@ defp check_system_commands!(:ok) do
|
|||
false
|
||||
end
|
||||
|
||||
if Enum.all?([preview_proxy_commands_status | filter_commands_statuses], & &1) do
|
||||
language_detector_commands_status =
|
||||
if Pleroma.Language.LanguageDetector.missing_dependencies() == [] do
|
||||
true
|
||||
else
|
||||
Logger.error(
|
||||
"The following dependencies required by the currently enabled " <>
|
||||
"language detection provider are not installed: " <>
|
||||
inspect(Pleroma.Language.LanguageDetector.missing_dependencies())
|
||||
)
|
||||
|
||||
false
|
||||
end
|
||||
|
||||
if Enum.all?(
|
||||
[
|
||||
preview_proxy_commands_status,
|
||||
language_detector_commands_status
|
||||
| filter_commands_statuses
|
||||
],
|
||||
& &1
|
||||
) do
|
||||
:ok
|
||||
else
|
||||
{:error,
|
||||
|
|
34
lib/pleroma/language/language_detector.ex
Normal file
34
lib/pleroma/language/language_detector.ex
Normal file
|
@ -0,0 +1,34 @@
|
|||
# Pleroma: A lightweight social networking server
|
||||
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
||||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
defmodule Pleroma.Language.LanguageDetector do
|
||||
@words_threshold 4
|
||||
|
||||
def missing_dependencies do
|
||||
provider = get_provider()
|
||||
|
||||
if provider do
|
||||
provider.missing_dependencies()
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
def detect(text) do
|
||||
provider = get_provider()
|
||||
|
||||
{:ok, text} = text |> FastSanitize.strip_tags()
|
||||
word_count = text |> String.split(~r/\s+/) |> Enum.count()
|
||||
|
||||
if word_count < @words_threshold or !provider or !provider.configured? do
|
||||
nil
|
||||
else
|
||||
provider.detect(text)
|
||||
end
|
||||
end
|
||||
|
||||
defp get_provider() do
|
||||
Pleroma.Config.get([__MODULE__, :provider])
|
||||
end
|
||||
end
|
47
lib/pleroma/language/language_detector/fasttext.ex
Normal file
47
lib/pleroma/language/language_detector/fasttext.ex
Normal file
|
@ -0,0 +1,47 @@
|
|||
# Pleroma: A lightweight social networking server
|
||||
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
||||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
defmodule Pleroma.Language.LanguageDetector.Fasttext do
|
||||
import Pleroma.Web.Utils.Guards, only: [not_empty_string: 1]
|
||||
|
||||
alias Pleroma.Language.LanguageDetector.Provider
|
||||
|
||||
@behaviour Provider
|
||||
|
||||
@impl Provider
|
||||
def missing_dependencies do
|
||||
if Pleroma.Utils.command_available?("fasttext") do
|
||||
[]
|
||||
else
|
||||
["fasttext"]
|
||||
end
|
||||
end
|
||||
|
||||
@impl Provider
|
||||
def configured?, do: not_empty_string(get_model())
|
||||
|
||||
@impl Provider
|
||||
def detect(text) do
|
||||
text_path = Path.join(System.tmp_dir!(), "fasttext-#{Ecto.UUID.generate()}")
|
||||
|
||||
File.write(text_path, text)
|
||||
|
||||
detected_language =
|
||||
case System.cmd("fasttext", ["predict", get_model(), text_path]) do
|
||||
{"__label__" <> language, _} ->
|
||||
language |> String.trim()
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
|
||||
File.rm(text_path)
|
||||
|
||||
detected_language
|
||||
end
|
||||
|
||||
defp get_model do
|
||||
Pleroma.Config.get([__MODULE__, :model])
|
||||
end
|
||||
end
|
11
lib/pleroma/language/language_detector/provider.ex
Normal file
11
lib/pleroma/language/language_detector/provider.ex
Normal file
|
@ -0,0 +1,11 @@
|
|||
# Pleroma: A lightweight social networking server
|
||||
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
||||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
defmodule Pleroma.Language.LanguageDetector.Provider do
|
||||
@callback missing_dependencies() :: [String.t()]
|
||||
|
||||
@callback configured?() :: boolean()
|
||||
|
||||
@callback detect(text :: String.t()) :: String.t() | nil
|
||||
end
|
|
@ -6,9 +6,9 @@ defmodule Pleroma.Language.Translation do
|
|||
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
|
||||
|
||||
def configured? do
|
||||
service = get_service()
|
||||
provider = get_provider()
|
||||
|
||||
!!service and service.configured?
|
||||
!!provider and provider.configured?
|
||||
end
|
||||
|
||||
def translate(text, source_language, target_language) do
|
||||
|
@ -16,13 +16,13 @@ def translate(text, source_language, target_language) do
|
|||
|
||||
case @cachex.get(:translations_cache, cache_key) do
|
||||
{:ok, nil} ->
|
||||
service = get_service()
|
||||
provider = get_provider()
|
||||
|
||||
result =
|
||||
if !service or !service.configured? do
|
||||
if !configured?() do
|
||||
{:error, :not_found}
|
||||
else
|
||||
service.translate(text, source_language, target_language)
|
||||
provider.translate(text, source_language, target_language)
|
||||
end
|
||||
|
||||
store_result(result, cache_key)
|
||||
|
@ -37,7 +37,7 @@ def translate(text, source_language, target_language) do
|
|||
end
|
||||
end
|
||||
|
||||
defp get_service, do: Pleroma.Config.get([__MODULE__, :provider])
|
||||
defp get_provider, do: Pleroma.Config.get([__MODULE__, :provider])
|
||||
|
||||
defp get_cache_key(text, source_language, target_language) do
|
||||
"#{source_language}/#{target_language}/#{content_hash(text)}"
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
defmodule Pleroma.Web.CommonAPI.ActivityDraft do
|
||||
alias Pleroma.Activity
|
||||
alias Pleroma.Conversation.Participation
|
||||
alias Pleroma.Language.LanguageDetector
|
||||
alias Pleroma.Object
|
||||
alias Pleroma.Web.ActivityPub.Builder
|
||||
alias Pleroma.Web.ActivityPub.Visibility
|
||||
|
@ -226,18 +227,14 @@ defp sensitive(draft) do
|
|||
%__MODULE__{draft | sensitive: sensitive}
|
||||
end
|
||||
|
||||
defp language(%{params: %{language: language}} = draft) when not_empty_string(language) do
|
||||
case Utils.get_valid_language(language) do
|
||||
language when is_binary(language) ->
|
||||
%__MODULE__{draft | language: language}
|
||||
defp language(draft) do
|
||||
language =
|
||||
Utils.get_valid_language(draft.params[:language]) ||
|
||||
LanguageDetector.detect(draft.full_payload)
|
||||
|
||||
_ ->
|
||||
draft
|
||||
end
|
||||
%__MODULE__{draft | language: language}
|
||||
end
|
||||
|
||||
defp language(draft), do: draft
|
||||
|
||||
defp object(draft) do
|
||||
emoji = Map.merge(Pleroma.Emoji.Formatter.get_emoji_map(draft.full_payload), draft.emoji)
|
||||
|
||||
|
|
Loading…
Reference in a new issue