From bf2a10331ed406dbfc804ee93d7218764623d6f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?marcin=20miko=C5=82ajczak?= Date: Thu, 10 Aug 2023 23:03:19 +0200 Subject: [PATCH] Allow to specify post language MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: marcin mikołajczak --- changelog.d/post-languages.add | 1 + .../web/activity_pub/transmogrifier.ex | 15 ++-- lib/pleroma/web/common_api/activity_draft.ex | 8 +- lib/pleroma/web/common_api/utils.ex | 28 +++---- .../transmogrifier/note_handling_test.exs | 81 +++++++++++++++++++ .../web/activity_pub/transmogrifier_test.exs | 12 +++ 6 files changed, 122 insertions(+), 23 deletions(-) create mode 100644 changelog.d/post-languages.add diff --git a/changelog.d/post-languages.add b/changelog.d/post-languages.add new file mode 100644 index 0000000000..04b350f3fc --- /dev/null +++ b/changelog.d/post-languages.add @@ -0,0 +1 @@ +Allow to specify post language \ No newline at end of file diff --git a/lib/pleroma/web/activity_pub/transmogrifier.ex b/lib/pleroma/web/activity_pub/transmogrifier.ex index e16a95ea97..e4f30a3afe 100644 --- a/lib/pleroma/web/activity_pub/transmogrifier.ex +++ b/lib/pleroma/web/activity_pub/transmogrifier.ex @@ -23,7 +23,7 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier do alias Pleroma.Web.Federator import Ecto.Query - import Pleroma.Web.CommonAPI.Utils, only: [get_valid_language: 1] + import Pleroma.Web.CommonAPI.Utils, only: [is_good_locale_code?: 1] import Pleroma.Web.Utils.Guards, only: [not_empty_string: 1] require Logger @@ -1011,9 +1011,12 @@ defp maybe_add_content_map(object), do: object def maybe_add_language(object) do language = - get_language_from_context(object) |> get_valid_language() || - get_language_from_content_map(object) |> get_valid_language() || - get_language_from_content(object) |> get_valid_language() + [ + get_language_from_context(object), + get_language_from_content_map(object), + get_language_from_content(object) + ] + |> Enum.find(&is_good_locale_code?(&1)) if language do Map.put(object, "language", language) @@ -1023,9 +1026,9 @@ def maybe_add_language(object) do end def maybe_add_language_from_activity(object, activity) do - language = get_language_from_context(activity) |> get_valid_language() + language = get_language_from_context(activity) - if language do + if is_good_locale_code?(language) do Map.put(object, "language", language) else object diff --git a/lib/pleroma/web/common_api/activity_draft.ex b/lib/pleroma/web/common_api/activity_draft.ex index a5f7db7790..8bf51ece6f 100644 --- a/lib/pleroma/web/common_api/activity_draft.ex +++ b/lib/pleroma/web/common_api/activity_draft.ex @@ -292,12 +292,16 @@ defp sensitive(draft) do defp language(draft) do language = - Utils.get_valid_language(draft.params[:language]) || + draft.params[:language] || LanguageDetector.detect( draft.content_html <> " " <> (draft.summary || draft.params[:name]) ) - %__MODULE__{draft | language: language} + if Utils.is_good_locale_code?(language) do + %__MODULE__{draft | language: language} + else + draft + end end defp object(draft) do diff --git a/lib/pleroma/web/common_api/utils.ex b/lib/pleroma/web/common_api/utils.ex index 51d614e952..37396ff89f 100644 --- a/lib/pleroma/web/common_api/utils.ex +++ b/lib/pleroma/web/common_api/utils.ex @@ -23,15 +23,6 @@ defmodule Pleroma.Web.CommonAPI.Utils do require Logger require Pleroma.Constants - @supported_locales ~w( - aa ab ae af ak am an ar as av ay az ba be bg bh bi bm bn bo br bs ca ce ch co cr cs cu cv cy da - de dv dz ee el en eo es et eu fa ff fi fj fo fr fy ga gd gl gn gu gv ha he hi ho hr ht hu hy hz - ia id ie ig ii ik io is it iu ja jv ka kg ki kj kk kl km kn ko kr ks ku kv kw ky la lb lg li ln - lo lt lu lv mg mh mi mk ml mn mr ms mt my na nb nd ne ng nl nn no nr nv ny oc oj om or os pa pi - pl ps pt qu rm rn ro ru rw sa sc sd se sg si sk sl sm sn so sq sr ss st su sv sw ta te tg th ti - tk tl tn to tr ts tt tw ty ug uk ur uz ve vi vo wa wo xh yi yo za zh zu ast ckb kab kmr zgh - ) - def attachments_from_ids(%{media_ids: ids, descriptions: desc}, user) do attachments_from_ids_descs(ids, desc, user) end @@ -522,12 +513,19 @@ def validate_attachments_count(attachments) do end end - def get_valid_language(language) when is_binary(language) do - case language |> String.split("_") |> Enum.at(0) do - locale when locale in @supported_locales -> locale - _ -> nil - end + def is_good_locale_code?(code) when is_binary(code) do + code + |> String.codepoints() + |> Enum.all?(&valid_char?/1) end - def get_valid_language(_), do: nil + def is_good_locale_code?(_code), do: false + + # [a-zA-Z0-9-] + defp valid_char?(char) do + ("a" <= char and char <= "z") or + ("A" <= char and char <= "Z") or + ("0" <= char and char <= "9") or + char == "-" + end end diff --git a/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs b/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs index 85dce57db0..7aebf67e58 100644 --- a/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs +++ b/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs @@ -482,6 +482,87 @@ test "it detects language from content" do end end + test "it detects language from context" do + user = insert(:user) + + message = %{ + "@context" => ["https://www.w3.org/ns/activitystreams", %{"@language" => "pl"}], + "to" => ["https://www.w3.org/ns/activitystreams#Public"], + "cc" => [], + "type" => "Create", + "object" => %{ + "to" => ["https://www.w3.org/ns/activitystreams#Public"], + "cc" => [], + "id" => Utils.generate_object_id(), + "type" => "Note", + "content" => "Szczęść Boże", + "attributedTo" => user.ap_id + }, + "actor" => user.ap_id + } + + {:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(message) + object = Object.normalize(data["object"], fetch: false) + + assert object.data["language"] == "pl" + end + + test "it detects language from contentMap" do + user = insert(:user) + + message = %{ + "@context" => "https://www.w3.org/ns/activitystreams", + "to" => ["https://www.w3.org/ns/activitystreams#Public"], + "cc" => [], + "type" => "Create", + "object" => %{ + "to" => ["https://www.w3.org/ns/activitystreams#Public"], + "cc" => [], + "id" => Utils.generate_object_id(), + "type" => "Note", + "content" => "Szczęść Boże", + "contentMap" => %{ + "de" => "Gott segne", + "pl" => "Szczęść Boże" + }, + "attributedTo" => user.ap_id + }, + "actor" => user.ap_id + } + + {:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(message) + object = Object.normalize(data["object"], fetch: false) + + assert object.data["language"] == "pl" + end + + test "it detects language from content" do + clear_config([Pleroma.Language.LanguageDetector, :provider], LanguageDetectorMock) + + user = insert(:user) + + message = %{ + "@context" => ["https://www.w3.org/ns/activitystreams"], + "to" => ["https://www.w3.org/ns/activitystreams#Public"], + "cc" => [], + "type" => "Create", + "object" => %{ + "to" => ["https://www.w3.org/ns/activitystreams#Public"], + "cc" => [], + "id" => Utils.generate_object_id(), + "type" => "Note", + "content" => "Dieu vous bénisse, Fédivers.", + "attributedTo" => user.ap_id + }, + "actor" => user.ap_id + } + + {:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(message) + object = Object.normalize(data["object"], fetch: false) + + assert object.data["language"] == "fr" + end + describe "`handle_incoming/2`, Mastodon format `replies` handling" do setup do: clear_config([:activitypub, :note_replies_output_limit], 5) setup do: clear_config([:instance, :federation_incoming_replies_max_depth]) diff --git a/test/pleroma/web/activity_pub/transmogrifier_test.exs b/test/pleroma/web/activity_pub/transmogrifier_test.exs index 264f0ccdbb..1244974649 100644 --- a/test/pleroma/web/activity_pub/transmogrifier_test.exs +++ b/test/pleroma/web/activity_pub/transmogrifier_test.exs @@ -376,6 +376,18 @@ test "it prepares a quote post" do end end + test "it adds contentMap if language is specified" do + user = insert(:user) + + {:ok, activity} = CommonAPI.post(user, %{status: "тест", language: "uk"}) + + {:ok, prepared} = Transmogrifier.prepare_outgoing(activity.data) + + assert prepared["object"]["contentMap"] == %{ + "uk" => "тест" + } + end + describe "actor rewriting" do test "it fixes the actor URL property to be a proper URI" do data = %{