From e154ebbf7933123e91d5b5c6f5070e78eb3e383b Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 15 Aug 2021 21:53:04 +0300 Subject: [PATCH 01/63] Initial meilisearch implementation, doesn't delete posts yet --- config/config.exs | 7 ++- config/test.exs | 2 + lib/mix/tasks/pleroma/search/meilisearch.ex | 38 ++++++++++++ lib/pleroma/activity.ex | 1 + lib/pleroma/activity/search.ex | 4 +- lib/pleroma/application.ex | 6 +- lib/pleroma/search/meilisearch.ex | 60 +++++++++++++++++++ lib/pleroma/web/activity_pub/activity_pub.ex | 6 ++ .../controllers/search_controller.ex | 5 +- 9 files changed, 123 insertions(+), 6 deletions(-) create mode 100644 lib/mix/tasks/pleroma/search/meilisearch.ex create mode 100644 lib/pleroma/search/meilisearch.ex diff --git a/config/config.exs b/config/config.exs index 4e21ce457c..1df7dd44b5 100644 --- a/config/config.exs +++ b/config/config.exs @@ -866,9 +866,14 @@ config :pleroma, ConcurrentLimiter, [ {Pleroma.Web.RichMedia.Helpers, [max_running: 5, max_waiting: 5]}, - {Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]} + {Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]}, + {Pleroma.Search, [max_running: 20, max_waiting: 50]} ] +config :pleroma, Pleroma.Search, module: Pleroma.Activity.Search + +config :pleroma, Pleroma.Search.Meilisearch, url: "http://127.0.0.1:7700/" + # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. import_config "#{Mix.env()}.exs" diff --git a/config/test.exs b/config/test.exs index d5c25f65e3..d1c356f146 100644 --- a/config/test.exs +++ b/config/test.exs @@ -133,6 +133,8 @@ ap_streamer: Pleroma.Web.ActivityPub.ActivityPubMock, logger: Pleroma.LoggerMock +config :pleroma, Pleroma.Search, module: Pleroma.Activity.Search + # Reduce recompilation time # https://dashbit.co/blog/speeding-up-re-compilation-of-elixir-projects config :phoenix, :plug_init_mode, :runtime diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex new file mode 100644 index 0000000000..2af8e58532 --- /dev/null +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -0,0 +1,38 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Mix.Tasks.Pleroma.Search.Meilisearch do + import Mix.Pleroma + + import Ecto.Query + + def run(["index"]) do + start_pleroma() + + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + Pleroma.Repo.chunk_stream( + from(Pleroma.Object, + limit: 200, + where: fragment("data->>'type' = 'Note'") and fragment("LENGTH(data->>'source') > 0") + ), + 100, + :batches + ) + |> Stream.map(fn objects -> + Enum.map(objects, fn object -> + data = object.data + %{id: object.id, source: data["source"], ap: data["id"]} + end) + end) + |> Stream.each(fn activities -> + {:ok, _} = + Pleroma.HTTP.post( + "#{endpoint}/indexes/objects/documents", + Jason.encode!(activities) + ) + end) + |> Stream.run() + end +end diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex index ebfd4ed45f..9563136f9b 100644 --- a/lib/pleroma/activity.ex +++ b/lib/pleroma/activity.ex @@ -367,6 +367,7 @@ def restrict_deactivated_users(query) do end defdelegate search(user, query, options \\ []), to: Pleroma.Activity.Search + def add_to_index(_activity), do: nil def direct_conversation_id(activity, for_user) do alias Pleroma.Conversation.Participation diff --git a/lib/pleroma/activity/search.ex b/lib/pleroma/activity/search.ex index 0b9b24aa43..3dce9d3553 100644 --- a/lib/pleroma/activity/search.ex +++ b/lib/pleroma/activity/search.ex @@ -136,7 +136,7 @@ defp query_with(q, :rum, search_query, :websearch) do ) end - defp maybe_restrict_local(q, user) do + def maybe_restrict_local(q, user) do limit = Pleroma.Config.get([:instance, :limit_to_local_content], :unauthenticated) case {limit, user} do @@ -149,7 +149,7 @@ defp maybe_restrict_local(q, user) do defp restrict_local(q), do: where(q, local: true) - defp maybe_fetch(activities, user, search_query) do + def maybe_fetch(activities, user, search_query) do with true <- Regex.match?(~r/https?:/, search_query), {:ok, object} <- Fetcher.fetch_object_from_id(search_query), %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]), diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index 1c1db8c104..62d1b8b39b 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -321,7 +321,11 @@ defp http_children(_, _), do: [] def limiters_setup do config = Config.get(ConcurrentLimiter, []) - [Pleroma.Web.RichMedia.Helpers, Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy] + [ + Pleroma.Web.RichMedia.Helpers, + Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, + Pleroma.Search + ] |> Enum.each(fn module -> mod_config = Keyword.get(config, module, []) diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex new file mode 100644 index 0000000000..92e0d34293 --- /dev/null +++ b/lib/pleroma/search/meilisearch.ex @@ -0,0 +1,60 @@ +defmodule Pleroma.Search.Meilisearch do + require Logger + + alias Pleroma.Activity + + import Pleroma.Activity.Search + import Ecto.Query + + def search(user, query, options \\ []) do + limit = Enum.min([Keyword.get(options, :limit), 40]) + offset = Keyword.get(options, :offset, 0) + author = Keyword.get(options, :author) + + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + {:ok, result} = + Pleroma.HTTP.post( + "#{endpoint}/indexes/objects/search", + Jason.encode!(%{q: query, offset: offset, limit: limit}) + ) + + hits = Jason.decode!(result.body)["hits"] |> Enum.map(& &1["ap"]) + + try do + hits + |> Activity.create_by_object_ap_id() + |> Activity.with_preloaded_object() + |> Activity.with_preloaded_object() + |> Activity.restrict_deactivated_users() + |> maybe_restrict_local(user) + |> maybe_restrict_author(author) + |> maybe_restrict_blocked(user) + |> maybe_fetch(user, query) + |> order_by([activity], desc: activity.id) + |> Pleroma.Repo.all() + rescue + _ -> maybe_fetch([], user, query) + end + end + + def add_to_index(activity) do + object = activity.object + + if activity.data["type"] == "Create" and not is_nil(object) and object.data["type"] == "Note" do + data = object.data + + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + {:ok, result} = + Pleroma.HTTP.post( + "#{endpoint}/indexes/objects/documents", + Jason.encode!([%{id: object.id, source: data["source"], ap: data["id"]}]) + ) + + if not Map.has_key?(Jason.decode!(result.body), "updateId") do + Logger.error("Failed to add activity #{activity.id} to index: #{result.body}") + end + end + end +end diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index a5d7036d93..034c3b185b 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -140,6 +140,12 @@ def insert(map, local \\ true, fake \\ false, bypass_actor_check \\ false) when Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) end) + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.add_to_index(activity) end) + end) + {:ok, activity} else %Activity{} = activity -> diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 5e6e04734c..99c33eba6c 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -5,7 +5,6 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do use Pleroma.Web, :controller - alias Pleroma.Activity alias Pleroma.Repo alias Pleroma.User alias Pleroma.Web.ControllerHelper @@ -100,7 +99,9 @@ defp resource_search(_, "accounts", query, options) do end defp resource_search(_, "statuses", query, options) do - statuses = with_fallback(fn -> Activity.search(options[:for_user], query, options) end) + search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) + + statuses = with_fallback(fn -> search_module.search(options[:for_user], query, options) end) StatusView.render("index.json", activities: statuses, From 0318e9a59945d7a5625111157867f0f9ebaffd91 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 16 Aug 2021 10:18:01 +0300 Subject: [PATCH 02/63] Add logging to milisiearch index and make it use desc(id) --- lib/mix/tasks/pleroma/search/meilisearch.ex | 26 +++++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 2af8e58532..1fece96e5e 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -3,8 +3,9 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Mix.Tasks.Pleroma.Search.Meilisearch do - import Mix.Pleroma + require Logger + import Mix.Pleroma import Ecto.Query def run(["index"]) do @@ -12,12 +13,25 @@ def run(["index"]) do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + {:ok, _} = + Pleroma.HTTP.post( + "#{endpoint}/indexes/objects/settings/ranking-rules", + Jason.encode!([ + "desc(id)", + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness" + ]) + ) + Pleroma.Repo.chunk_stream( from(Pleroma.Object, - limit: 200, where: fragment("data->>'type' = 'Note'") and fragment("LENGTH(data->>'source') > 0") ), - 100, + 200, :batches ) |> Stream.map(fn objects -> @@ -26,12 +40,14 @@ def run(["index"]) do %{id: object.id, source: data["source"], ap: data["id"]} end) end) - |> Stream.each(fn activities -> + |> Stream.each(fn objects -> {:ok, _} = Pleroma.HTTP.post( "#{endpoint}/indexes/objects/documents", - Jason.encode!(activities) + Jason.encode!(objects) ) + + IO.puts("Indexed #{Enum.count(objects)} entries") end) |> Stream.run() end From 365024abec905e427babb5403f0fccbde65f4bcd Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 16 Aug 2021 22:24:31 +0300 Subject: [PATCH 03/63] Ensure only indexing public posts and implement clearing and delete --- lib/mix/tasks/pleroma/search/meilisearch.ex | 15 ++++++++++++++- lib/pleroma/activity.ex | 1 + lib/pleroma/search/meilisearch.ex | 17 ++++++++++++++++- lib/pleroma/web/common_api.ex | 7 +++++++ 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 1fece96e5e..0b86fdecec 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -4,6 +4,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do require Logger + require Pleroma.Constants import Mix.Pleroma import Ecto.Query @@ -29,7 +30,11 @@ def run(["index"]) do Pleroma.Repo.chunk_stream( from(Pleroma.Object, - where: fragment("data->>'type' = 'Note'") and fragment("LENGTH(data->>'source') > 0") + # Only index public posts which are notes and have some text + where: + fragment("data->>'type' = 'Note'") and + fragment("LENGTH(data->>'source') > 0") and + fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()) ), 200, :batches @@ -51,4 +56,12 @@ def run(["index"]) do end) |> Stream.run() end + + def run(["clear"]) do + start_pleroma() + + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + {:ok, _} = Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects/documents", "", [], []) + end end diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex index 9563136f9b..2c168fd410 100644 --- a/lib/pleroma/activity.ex +++ b/lib/pleroma/activity.ex @@ -368,6 +368,7 @@ def restrict_deactivated_users(query) do defdelegate search(user, query, options \\ []), to: Pleroma.Activity.Search def add_to_index(_activity), do: nil + def remove_from_index(_object), do: nil def direct_conversation_id(activity, for_user) do alias Pleroma.Conversation.Participation diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 92e0d34293..dbe6b2d672 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -1,5 +1,6 @@ defmodule Pleroma.Search.Meilisearch do require Logger + require Pleroma.Constants alias Pleroma.Activity @@ -41,7 +42,8 @@ def search(user, query, options \\ []) do def add_to_index(activity) do object = activity.object - if activity.data["type"] == "Create" and not is_nil(object) and object.data["type"] == "Note" do + if activity.data["type"] == "Create" and not is_nil(object) and object.data["type"] == "Note" and + Pleroma.Constants.as_public() in object.data["to"] do data = object.data endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) @@ -57,4 +59,17 @@ def add_to_index(activity) do end end end + + def remove_from_index(object) do + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + {:ok, _} = + Pleroma.HTTP.request( + :delete, + "#{endpoint}/indexes/objects/documents/#{object.id}", + "", + [], + [] + ) + end end diff --git a/lib/pleroma/web/common_api.ex b/lib/pleroma/web/common_api.ex index 89f5dd6065..54a8aa2139 100644 --- a/lib/pleroma/web/common_api.ex +++ b/lib/pleroma/web/common_api.ex @@ -147,6 +147,13 @@ def delete(activity_id, user) do true <- User.superuser?(user) || user.ap_id == object.data["actor"], {:ok, delete_data, _} <- Builder.delete(user, object.data["id"]), {:ok, delete, _} <- Pipeline.common_pipeline(delete_data, local: true) do + # Also delete from search index + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.remove_from_index(object) end) + end) + {:ok, delete} else {:find_activity, _} -> From ea6a6a128712e81c4f298b2bb2cedfadf2295cff Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 16 Aug 2021 22:30:56 +0300 Subject: [PATCH 04/63] Make the indexing batch differently and more, show number indexed --- lib/mix/tasks/pleroma/search/meilisearch.ex | 65 ++++++++++++--------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 0b86fdecec..2a64385289 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -28,33 +28,46 @@ def run(["index"]) do ]) ) - Pleroma.Repo.chunk_stream( - from(Pleroma.Object, - # Only index public posts which are notes and have some text - where: - fragment("data->>'type' = 'Note'") and - fragment("LENGTH(data->>'source') > 0") and - fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()) - ), - 200, - :batches - ) - |> Stream.map(fn objects -> - Enum.map(objects, fn object -> - data = object.data - %{id: object.id, source: data["source"], ap: data["id"]} - end) - end) - |> Stream.each(fn objects -> - {:ok, _} = - Pleroma.HTTP.post( - "#{endpoint}/indexes/objects/documents", - Jason.encode!(objects) - ) + chunk_size = 100_000 - IO.puts("Indexed #{Enum.count(objects)} entries") - end) - |> Stream.run() + Pleroma.Repo.transaction( + fn -> + Pleroma.Repo.stream( + from(Pleroma.Object, + # Only index public posts which are notes and have some text + where: + fragment("data->>'type' = 'Note'") and + fragment("LENGTH(data->>'source') > 0") and + fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()), + order_by: fragment("data->'published' DESC") + ), + timeout: :infinity + ) + |> Stream.chunk_every(chunk_size) + |> Stream.transform(0, fn objects, acc -> + new_acc = acc + Enum.count(objects) + + IO.puts("Indexed #{new_acc} entries") + + {[objects], new_acc} + end) + |> Stream.map(fn objects -> + Enum.map(objects, fn object -> + data = object.data + %{id: object.id, source: data["source"], ap: data["id"]} + end) + end) + |> Stream.each(fn objects -> + {:ok, _} = + Pleroma.HTTP.post( + "#{endpoint}/indexes/objects/documents", + Jason.encode!(objects) + ) + end) + |> Stream.run() + end, + timeout: :infinity + ) end def run(["clear"]) do From 38996f551a4ec014e9f4cb4a691d31beecab43ba Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 17 Aug 2021 00:06:32 +0300 Subject: [PATCH 05/63] Make meilisearch sort on publish date converted to unix time --- lib/mix/tasks/pleroma/search/meilisearch.ex | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 2a64385289..2dd9c0a62f 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -18,7 +18,7 @@ def run(["index"]) do Pleroma.HTTP.post( "#{endpoint}/indexes/objects/settings/ranking-rules", Jason.encode!([ - "desc(id)", + "desc(published)", "typo", "words", "proximity", @@ -54,7 +54,15 @@ def run(["index"]) do |> Stream.map(fn objects -> Enum.map(objects, fn object -> data = object.data - %{id: object.id, source: data["source"], ap: data["id"]} + + {:ok, published, _} = DateTime.from_iso8601(data["published"]) + + %{ + id: object.id, + source: data["source"], + ap: data["id"], + published: published |> DateTime.to_unix() + } end) end) |> Stream.each(fn objects -> From 9beaebd97e1746df010aecfcc01d9e2e9a4c60ac Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 17 Aug 2021 00:30:14 +0300 Subject: [PATCH 06/63] Tweak search ordering to hopefully return newer results --- lib/mix/tasks/pleroma/search/meilisearch.ex | 15 ++++++++++++--- lib/pleroma/search/meilisearch.ex | 13 +++++++++++-- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 2dd9c0a62f..dcecbd7cfb 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -39,7 +39,7 @@ def run(["index"]) do fragment("data->>'type' = 'Note'") and fragment("LENGTH(data->>'source') > 0") and fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()), - order_by: fragment("data->'published' DESC") + order_by: [desc: fragment("data->'published'")] ), timeout: :infinity ) @@ -66,11 +66,15 @@ def run(["index"]) do end) end) |> Stream.each(fn objects -> - {:ok, _} = + {:ok, result} = Pleroma.HTTP.post( "#{endpoint}/indexes/objects/documents", Jason.encode!(objects) ) + + if not Map.has_key?(Jason.decode!(result.body), "updateId") do + IO.puts("Failed to index: #{result}") + end end) |> Stream.run() end, @@ -83,6 +87,11 @@ def run(["clear"]) do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - {:ok, _} = Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects/documents", "", [], []) + {:ok, result} = + Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects/documents", "", [], []) + + if not Map.has_key?(Jason.decode!(result.body), "updateId") do + IO.puts("Failed to clear: #{result}") + end end end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index dbe6b2d672..9fdb0a07fe 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -32,7 +32,7 @@ def search(user, query, options \\ []) do |> maybe_restrict_author(author) |> maybe_restrict_blocked(user) |> maybe_fetch(user, query) - |> order_by([activity], desc: activity.id) + |> order_by([object: obj], desc: obj.data["published"]) |> Pleroma.Repo.all() rescue _ -> maybe_fetch([], user, query) @@ -48,10 +48,19 @@ def add_to_index(activity) do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + {:ok, published, _} = DateTime.from_iso8601(data["published"]) + {:ok, result} = Pleroma.HTTP.post( "#{endpoint}/indexes/objects/documents", - Jason.encode!([%{id: object.id, source: data["source"], ap: data["id"]}]) + Jason.encode!([ + %{ + id: object.id, + source: data["source"], + ap: data["id"], + published: published |> DateTime.to_unix() + } + ]) ) if not Map.has_key?(Jason.decode!(result.body), "updateId") do From 00c48a33acf0bd59fa7e7b58a67b049e4f4adc31 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 17 Aug 2021 00:57:53 +0300 Subject: [PATCH 07/63] Use content instead of source and scrub it --- lib/mix/tasks/pleroma/search/meilisearch.ex | 12 ++++-------- lib/pleroma/search/meilisearch.ex | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index dcecbd7cfb..5270de2558 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -37,7 +37,7 @@ def run(["index"]) do # Only index public posts which are notes and have some text where: fragment("data->>'type' = 'Note'") and - fragment("LENGTH(data->>'source') > 0") and + fragment("LENGTH(data->>'content') > 0") and fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()), order_by: [desc: fragment("data->'published'")] ), @@ -56,10 +56,11 @@ def run(["index"]) do data = object.data {:ok, published, _} = DateTime.from_iso8601(data["published"]) + {:ok, content} = FastSanitize.strip_tags(data["content"]) %{ id: object.id, - source: data["source"], + content: content, ap: data["id"], published: published |> DateTime.to_unix() } @@ -87,11 +88,6 @@ def run(["clear"]) do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - {:ok, result} = - Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects/documents", "", [], []) - - if not Map.has_key?(Jason.decode!(result.body), "updateId") do - IO.puts("Failed to clear: #{result}") - end + {:ok, _} = Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects", "", [], []) end end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 9fdb0a07fe..87fdeaf5e0 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -56,7 +56,7 @@ def add_to_index(activity) do Jason.encode!([ %{ id: object.id, - source: data["source"], + content: data["content"] |> Pleroma.HTML.filter_tags(), ap: data["id"], published: published |> DateTime.to_unix() } From e35d87ea54f70a39206f6103ef0e7334e2a428cc Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 17 Aug 2021 01:37:43 +0300 Subject: [PATCH 08/63] Make the chunk size smaller --- lib/mix/tasks/pleroma/search/meilisearch.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 5270de2558..44af25f3e3 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -28,7 +28,7 @@ def run(["index"]) do ]) ) - chunk_size = 100_000 + chunk_size = 10_000 Pleroma.Repo.transaction( fn -> From 2b2e409ad72862967cabf06344874ae9bff9860f Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 22 Aug 2021 16:37:52 +0300 Subject: [PATCH 09/63] Also index incoming federated posts --- lib/pleroma/search/search.ex | 18 ++++++++++++++++++ lib/pleroma/web/activity_pub/activity_pub.ex | 7 ++----- lib/pleroma/web/activity_pub/side_effects.ex | 7 +++++++ lib/pleroma/web/common_api.ex | 8 ++------ 4 files changed, 29 insertions(+), 11 deletions(-) create mode 100644 lib/pleroma/search/search.ex diff --git a/lib/pleroma/search/search.ex b/lib/pleroma/search/search.ex new file mode 100644 index 0000000000..e363abf194 --- /dev/null +++ b/lib/pleroma/search/search.ex @@ -0,0 +1,18 @@ +defmodule Pleroma.Search do + def add_to_index(activity) do + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.add_to_index(activity) end) + end) + end + + def remove_from_index(object) do + # Also delete from search index + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.remove_from_index(object) end) + end) + end +end diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 034c3b185b..7178cf9eb0 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -140,11 +140,8 @@ def insert(map, local \\ true, fake \\ false, bypass_actor_check \\ false) when Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) end) - search_module = Pleroma.Config.get([Pleroma.Search, :module]) - - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.add_to_index(activity) end) - end) + # Add local posts to search index + Pleroma.Search.add_to_index(activity) {:ok, activity} else diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index 5eefd2824f..15e006b18b 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -197,6 +197,7 @@ def handle(%{data: %{"type" => "Like"}} = object, meta) do # - Increase replies count # - Set up ActivityExpiration # - Set up notifications + # - Index incoming posts for search (if needed) @impl true def handle(%{data: %{"type" => "Create"}} = activity, meta) do with {:ok, object, meta} <- handle_object_creation(meta[:object_data], activity, meta), @@ -226,6 +227,8 @@ def handle(%{data: %{"type" => "Create"}} = activity, meta) do Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) end) + Pleroma.Search.add_to_index(Map.put(activity, :object, object)) + meta = meta |> add_notifications(notifications) @@ -286,6 +289,7 @@ def handle(%{data: %{"type" => "EmojiReact"}} = object, meta) do # - Reduce the user note count # - Reduce the reply count # - Stream out the activity + # - Removes posts from search index (if needed) @impl true def handle(%{data: %{"type" => "Delete", "object" => deleted_object}} = object, meta) do deleted_object = @@ -325,6 +329,9 @@ def handle(%{data: %{"type" => "Delete", "object" => deleted_object}} = object, if result == :ok do Notification.create_notifications(object) + + Pleroma.Search.remove_from_index(object) + {:ok, object, meta} else {:error, result} diff --git a/lib/pleroma/web/common_api.ex b/lib/pleroma/web/common_api.ex index 54a8aa2139..ba6c079757 100644 --- a/lib/pleroma/web/common_api.ex +++ b/lib/pleroma/web/common_api.ex @@ -147,12 +147,8 @@ def delete(activity_id, user) do true <- User.superuser?(user) || user.ap_id == object.data["actor"], {:ok, delete_data, _} <- Builder.delete(user, object.data["id"]), {:ok, delete, _} <- Pipeline.common_pipeline(delete_data, local: true) do - # Also delete from search index - search_module = Pleroma.Config.get([Pleroma.Search, :module]) - - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.remove_from_index(object) end) - end) + # Remove from search index for local posts + Pleroma.Search.remove_from_index(object) {:ok, delete} else From 9f16ca80e0fe60b8b0e3e8ddb9b06ca0bec31002 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 22 Aug 2021 18:47:41 +0300 Subject: [PATCH 10/63] Mark only content as searchable for meilisearch --- lib/mix/tasks/pleroma/search/meilisearch.ex | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 44af25f3e3..ebd3cc81fd 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -28,6 +28,14 @@ def run(["index"]) do ]) ) + {:ok, _} = + Pleroma.HTTP.post( + "#{endpoint}/indexes/objects/settings/searchable-attributes", + Jason.encode!([ + "content" + ]) + ) + chunk_size = 10_000 Pleroma.Repo.transaction( @@ -55,8 +63,14 @@ def run(["index"]) do Enum.map(objects, fn object -> data = object.data + content_str = + case data["content"] do + [nil | rest] -> to_string(rest) + str -> str + end + {:ok, published, _} = DateTime.from_iso8601(data["published"]) - {:ok, content} = FastSanitize.strip_tags(data["content"]) + {:ok, content} = FastSanitize.strip_tags(content_str) %{ id: object.id, From 3dedadf192a3acd0c1dfc2b11eba5a247ae7f61c Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 22 Aug 2021 19:38:03 +0300 Subject: [PATCH 11/63] Adjust content indexing to skip more unneeded stuff --- lib/mix/tasks/pleroma/search/meilisearch.ex | 45 ++++++++++++++------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index ebd3cc81fd..3704e0bdca 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -52,13 +52,6 @@ def run(["index"]) do timeout: :infinity ) |> Stream.chunk_every(chunk_size) - |> Stream.transform(0, fn objects, acc -> - new_acc = acc + Enum.count(objects) - - IO.puts("Indexed #{new_acc} entries") - - {[objects], new_acc} - end) |> Stream.map(fn objects -> Enum.map(objects, fn object -> data = object.data @@ -70,15 +63,34 @@ def run(["index"]) do end {:ok, published, _} = DateTime.from_iso8601(data["published"]) - {:ok, content} = FastSanitize.strip_tags(content_str) - %{ - id: object.id, - content: content, - ap: data["id"], - published: published |> DateTime.to_unix() - } + content = + with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str), + trimmed <- String.trim(scrubbed) do + trimmed + end + + # Only index if there is anything in the string. If there is a single symbol, + # it's probably a dot from mastodon posts with just the picture + if String.length(content) > 1 do + %{ + id: object.id, + content: content, + ap: data["id"], + published: published |> DateTime.to_unix() + } + else + nil + end end) + |> Enum.filter(fn o -> not is_nil(o) end) + end) + |> Stream.transform(0, fn objects, acc -> + new_acc = acc + Enum.count(objects) + + IO.puts("Indexed #{new_acc} entries") + + {[objects], new_acc} end) |> Stream.each(fn objects -> {:ok, result} = @@ -102,6 +114,9 @@ def run(["clear"]) do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - {:ok, _} = Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects", "", [], []) + {:ok, _} = + Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects/documents", "", [], + timeout: :infinity + ) end end From 35e9192cedcbc56fb07c9933e2988bf900256b53 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 22 Aug 2021 22:53:18 +0300 Subject: [PATCH 12/63] Rework task indexing to share code with the main module The code in the main module now scrubs new posts too --- lib/mix/tasks/pleroma/search/meilisearch.ex | 35 +--------------- lib/pleroma/search/meilisearch.ex | 46 ++++++++++++++------- 2 files changed, 34 insertions(+), 47 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 3704e0bdca..b5a394e349 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -51,40 +51,9 @@ def run(["index"]) do ), timeout: :infinity ) + |> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1) + |> Stream.filter(fn o -> not is_nil(o) end) |> Stream.chunk_every(chunk_size) - |> Stream.map(fn objects -> - Enum.map(objects, fn object -> - data = object.data - - content_str = - case data["content"] do - [nil | rest] -> to_string(rest) - str -> str - end - - {:ok, published, _} = DateTime.from_iso8601(data["published"]) - - content = - with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str), - trimmed <- String.trim(scrubbed) do - trimmed - end - - # Only index if there is anything in the string. If there is a single symbol, - # it's probably a dot from mastodon posts with just the picture - if String.length(content) > 1 do - %{ - id: object.id, - content: content, - ap: data["id"], - published: published |> DateTime.to_unix() - } - else - nil - end - end) - |> Enum.filter(fn o -> not is_nil(o) end) - end) |> Stream.transform(0, fn objects, acc -> new_acc = acc + Enum.count(objects) diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 87fdeaf5e0..10468e36c0 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -39,28 +39,46 @@ def search(user, query, options \\ []) do end end - def add_to_index(activity) do - object = activity.object - - if activity.data["type"] == "Create" and not is_nil(object) and object.data["type"] == "Note" and + def object_to_search_data(object) do + if not is_nil(object) and object.data["type"] == "Note" and Pleroma.Constants.as_public() in object.data["to"] do data = object.data - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + content_str = + case data["content"] do + [nil | rest] -> to_string(rest) + str -> str + end - {:ok, published, _} = DateTime.from_iso8601(data["published"]) + content = + with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str), + trimmed <- String.trim(scrubbed) do + trimmed + end + + if String.length(content) > 1 do + {:ok, published, _} = DateTime.from_iso8601(data["published"]) + + %{ + id: object.id, + content: content, + ap: data["id"], + published: published |> DateTime.to_unix() + } + end + end + end + + def add_to_index(activity) do + maybe_search_data = object_to_search_data(activity) + + if activity.data["type"] == "Create" and maybe_search_data do + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) {:ok, result} = Pleroma.HTTP.post( "#{endpoint}/indexes/objects/documents", - Jason.encode!([ - %{ - id: object.id, - content: data["content"] |> Pleroma.HTML.filter_tags(), - ap: data["id"], - published: published |> DateTime.to_unix() - } - ]) + Jason.encode!([maybe_search_data]) ) if not Map.has_key?(Jason.decode!(result.body), "updateId") do From 410c8cb765bbec1014cb2bbdbcc44d3a25f834e1 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 22 Aug 2021 23:47:43 +0300 Subject: [PATCH 13/63] Make indexing logs rewrite themselves --- lib/mix/tasks/pleroma/search/meilisearch.ex | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index b5a394e349..2485a441d8 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -57,7 +57,9 @@ def run(["index"]) do |> Stream.transform(0, fn objects, acc -> new_acc = acc + Enum.count(objects) - IO.puts("Indexed #{new_acc} entries") + # Reset to the beginning of the line and rewrite it + IO.write("\r") + IO.write("Indexed #{new_acc} entries") {[objects], new_acc} end) @@ -76,6 +78,8 @@ def run(["index"]) do end, timeout: :infinity ) + + IO.write("\n") end def run(["clear"]) do From 2c7d973af7797ae860829c1764ade521a17e7263 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 23 Aug 2021 19:35:21 +0300 Subject: [PATCH 14/63] Implement meilisearch auth --- lib/mix/tasks/pleroma/search/meilisearch.ex | 75 ++++++++++++--------- lib/pleroma/search/meilisearch.ex | 69 ++++++++++++------- 2 files changed, 88 insertions(+), 56 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 2485a441d8..230be5aa12 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -9,32 +9,30 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do import Mix.Pleroma import Ecto.Query + import Pleroma.Search.Meilisearch, only: [meili_post!: 2, meili_delete!: 1] + def run(["index"]) do start_pleroma() - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + meili_post!( + "/indexes/objects/settings/ranking-rules", + [ + "desc(published)", + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness" + ] + ) - {:ok, _} = - Pleroma.HTTP.post( - "#{endpoint}/indexes/objects/settings/ranking-rules", - Jason.encode!([ - "desc(published)", - "typo", - "words", - "proximity", - "attribute", - "wordsPosition", - "exactness" - ]) - ) - - {:ok, _} = - Pleroma.HTTP.post( - "#{endpoint}/indexes/objects/settings/searchable-attributes", - Jason.encode!([ - "content" - ]) - ) + meili_post!( + "/indexes/objects/settings/searchable-attributes", + [ + "content" + ] + ) chunk_size = 10_000 @@ -64,14 +62,14 @@ def run(["index"]) do {[objects], new_acc} end) |> Stream.each(fn objects -> - {:ok, result} = - Pleroma.HTTP.post( - "#{endpoint}/indexes/objects/documents", - Jason.encode!(objects) + result = + meili_post!( + "/indexes/objects/documents", + objects ) - if not Map.has_key?(Jason.decode!(result.body), "updateId") do - IO.puts("Failed to index: #{result}") + if not Map.has_key?(result, "updateId") do + IO.puts("Failed to index: #{inspect(result)}") end end) |> Stream.run() @@ -85,11 +83,26 @@ def run(["index"]) do def run(["clear"]) do start_pleroma() + meili_delete!("/indexes/objects/documents") + end + + def run(["show-private-key", master_key]) do + start_pleroma() + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - {:ok, _} = - Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects/documents", "", [], - timeout: :infinity + {:ok, result} = + Pleroma.HTTP.get( + Path.join(endpoint, "/keys"), + [{"X-Meili-API-Key", master_key}] ) + + decoded = Jason.decode!(result.body) + + if decoded["private"] do + IO.puts(decoded["private"]) + else + IO.puts("Error fetching the key, check the master key is correct: #{inspect(decoded)}") + end end end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 10468e36c0..8745d539d7 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -7,20 +7,50 @@ defmodule Pleroma.Search.Meilisearch do import Pleroma.Activity.Search import Ecto.Query + defp meili_headers() do + private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key]) + + if is_nil(private_key), do: [], else: [{"X-Meili-API-Key", private_key}] + end + + def meili_post!(path, params) do + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + {:ok, result} = + Pleroma.HTTP.post( + Path.join(endpoint, path), + Jason.encode!(params), + meili_headers() + ) + + Jason.decode!(result.body) + end + + def meili_delete!(path) do + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + {:ok, _} = + Pleroma.HTTP.request( + :delete, + Path.join(endpoint, path), + "", + meili_headers(), + timeout: :infinity + ) + end + def search(user, query, options \\ []) do limit = Enum.min([Keyword.get(options, :limit), 40]) offset = Keyword.get(options, :offset, 0) author = Keyword.get(options, :author) - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - - {:ok, result} = - Pleroma.HTTP.post( - "#{endpoint}/indexes/objects/search", - Jason.encode!(%{q: query, offset: offset, limit: limit}) + result = + meili_post!( + "/indexes/objects/search", + %{q: query, offset: offset, limit: limit} ) - hits = Jason.decode!(result.body)["hits"] |> Enum.map(& &1["ap"]) + hits = result["hits"] |> Enum.map(& &1["ap"]) try do hits @@ -73,30 +103,19 @@ def add_to_index(activity) do maybe_search_data = object_to_search_data(activity) if activity.data["type"] == "Create" and maybe_search_data do - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - - {:ok, result} = - Pleroma.HTTP.post( - "#{endpoint}/indexes/objects/documents", - Jason.encode!([maybe_search_data]) + result = + meili_post!( + "/indexes/objects/documents", + [maybe_search_data] ) - if not Map.has_key?(Jason.decode!(result.body), "updateId") do - Logger.error("Failed to add activity #{activity.id} to index: #{result.body}") + if not Map.has_key?(result, "updateId") do + Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}") end end end def remove_from_index(object) do - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - - {:ok, _} = - Pleroma.HTTP.request( - :delete, - "#{endpoint}/indexes/objects/documents/#{object.id}", - "", - [], - [] - ) + meili_delete!("/indexes/objects/documents/#{object.id}") end end From a67f9da5cc46b4e184aa1afe3dd1bd1df31de15b Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 23 Aug 2021 20:02:34 +0300 Subject: [PATCH 15/63] Add a message with a count of posts to index --- lib/mix/tasks/pleroma/search/meilisearch.ex | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 230be5aa12..557b061821 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -38,7 +38,7 @@ def run(["index"]) do Pleroma.Repo.transaction( fn -> - Pleroma.Repo.stream( + query = from(Pleroma.Object, # Only index public posts which are notes and have some text where: @@ -46,7 +46,13 @@ def run(["index"]) do fragment("LENGTH(data->>'content') > 0") and fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()), order_by: [desc: fragment("data->'published'")] - ), + ) + + count = query |> Pleroma.Repo.aggregate(:count, :data) + IO.puts("Entries to index: #{count}") + + Pleroma.Repo.stream( + query, timeout: :infinity ) |> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1) From 09a1ae1b6eca4efbb935aa1c0da950009d110fb2 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 23 Aug 2021 20:21:46 +0300 Subject: [PATCH 16/63] Add the meilisearch.stats command --- lib/mix/tasks/pleroma/search/meilisearch.ex | 10 +++++++++- lib/pleroma/search/meilisearch.ex | 12 ++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 557b061821..f2d9fe3128 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -9,7 +9,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do import Mix.Pleroma import Ecto.Query - import Pleroma.Search.Meilisearch, only: [meili_post!: 2, meili_delete!: 1] + import Pleroma.Search.Meilisearch, only: [meili_post!: 2, meili_delete!: 1, meili_get!: 1] def run(["index"]) do start_pleroma() @@ -111,4 +111,12 @@ def run(["show-private-key", master_key]) do IO.puts("Error fetching the key, check the master key is correct: #{inspect(decoded)}") end end + + def run(["stats"]) do + start_pleroma() + + result = meili_get!("/indexes/objects/stats") + IO.puts("Number of entries: #{result["numberOfDocuments"]}") + IO.puts("Indexing? #{result["isIndexing"]}") + end end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 8745d539d7..1ad17bf9f7 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -13,6 +13,18 @@ defp meili_headers() do if is_nil(private_key), do: [], else: [{"X-Meili-API-Key", private_key}] end + def meili_get!(path) do + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + {:ok, result} = + Pleroma.HTTP.get( + Path.join(endpoint, path), + meili_headers() + ) + + Jason.decode!(result.body) + end + def meili_post!(path, params) do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) From 07ccab9766a6289326676a4814537564f25f35fa Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 23 Aug 2021 20:27:16 +0300 Subject: [PATCH 17/63] Add search/meilisearch documentation --- docs/configuration/search.md | 99 ++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/configuration/search.md diff --git a/docs/configuration/search.md b/docs/configuration/search.md new file mode 100644 index 0000000000..14ec2bc631 --- /dev/null +++ b/docs/configuration/search.md @@ -0,0 +1,99 @@ +# Configuring search + +{! backend/administration/CLI_tasks/general_cli_task_info.include !} + +## Built-in search + +To use built-in search that has no external dependencies, set the search module to `Pleroma.Activity`: + +> config :pleroma, Pleroma.Search, module: Pleroma.Activity + +While it has no external dependencies, it has problems with performance and relevancy. + +## Meilisearch + +To use [meilisearch](https://www.meilisearch.com/), set the search module to `Pleroma.Search.Meilisearch`: + +> config :pleroma, Pleroma.Search, module: Pleroma.Search.Meilisearch + +You then need to set the address of the meilisearch instance, and optionally the private key for authentication. + +> config :pleroma, Pleroma.Search.Meilisearch, +> url: "http://127.0.0.1:7700/", +> private_key: "private key" + +Information about setting up meilisearch can be found in the +[official documentation](https://docs.meilisearch.com/learn/getting_started/installation.html). +You probably want to start it with `MEILI_NO_ANALYTICS=true` and `MEILI_NO_CENTRY=true` environment variables, +to disable analytics. + +### Private key authentication (optional) + +To set the private key, use the `MEILI_MASTER_KEY` environment variable when starting. After setting the _master key_, +you have to get the _private key_, which is actually used for authentication. + +=== "OTP" + ```sh + ./bin/pleroma_ctl search.meilisearch show-private-key + ``` + +=== "From Source" + ```sh + mix pleroma.search.meilisearch show-private-key + ``` + +This is the key you actually put into your configuration file. + +### Initial indexing + +After setting up the configuration, you'll want to index all of your already existsing posts. Only public posts are indexed. You'll only +have to do it one time, but it might take a while, depending on the amount of posts your instance has seen. This is also a fairly RAM +consuming process for `meilisearch`, and it will take a lot of RAM when running if you have a lot of posts (seems to be around 5G for ~1.2 +million posts while idle and up to 7G while indexing initially, but your experience may be different). + +To start te initial indexing, run the `index` command: + +=== "OTP" + ```sh + ./bin/pleroma_ctl search.meilisearch index + ``` + +=== "From Source" + ```sh + mix pleroma.search.meilisearch index + ``` + +This will show you the total amount of posts to index, and then show you the amount of posts indexed currently, until the numbers eventually +become the same. The posts are indexed in big batches and meilisearch will take some time to actually index them, even after you have +inserted all the posts into it. Depending on the amount of posts, this may be as long as several hours. To get information about the status +of indexing and how many posts have actually been indexed, use the `stats` command: + +=== "OTP" + ```sh + ./bin/pleroma_ctl search.meilisearch stats + ``` + +=== "From Source" + ```sh + mix pleroma.search.meilisearch stats + ``` + +### Clearing the index + +In case you need to clear the index (for example, to re-index from scratch, if that needs to happen for some reason), you can +use the `clear` command: + +=== "OTP" + ```sh + ./bin/pleroma_ctl search.meilisearch clear + ``` + +=== "From Source" + ```sh + mix pleroma.search.meilisearch clear + ``` + +This will clear **all** the posts from the search index. Note, that deleted posts are also removed from index by the instance itself, so +there is no need to actually clear the whole index, unless you want **all** of it gone. That said, the index does not hold any information +that cannot be re-created from the database, it should also generally be a lot smaller than the size of your database. Still, the size +depends on the amount of text in posts. From d9ef7e075880ba39dd4ca8e21566c680070faa42 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 23 Aug 2021 21:15:15 +0300 Subject: [PATCH 18/63] Fix activity being passed to objec_to_search_data --- lib/pleroma/search/meilisearch.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 1ad17bf9f7..212bdd473c 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -112,7 +112,7 @@ def object_to_search_data(object) do end def add_to_index(activity) do - maybe_search_data = object_to_search_data(activity) + maybe_search_data = object_to_search_data(activity.object) if activity.data["type"] == "Create" and maybe_search_data do result = From 005947e9f77b40d1b6dd6c05f952df6ecb2aa1fc Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 23 Aug 2021 23:52:21 +0300 Subject: [PATCH 19/63] Add tests for local post indexing for meilisearch --- config/test.exs | 4 +- test/pleroma/search/meilisearch_test.exs | 108 +++++++++++++++++++++++ 2 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 test/pleroma/search/meilisearch_test.exs diff --git a/config/test.exs b/config/test.exs index d1c356f146..c9b2b51ba1 100644 --- a/config/test.exs +++ b/config/test.exs @@ -133,7 +133,9 @@ ap_streamer: Pleroma.Web.ActivityPub.ActivityPubMock, logger: Pleroma.LoggerMock -config :pleroma, Pleroma.Search, module: Pleroma.Activity.Search +config :pleroma, Pleroma.Search, module: Pleroma.Activity + +config :pleroma, Pleroma.Search.Meilisearch, url: "http://127.0.0.1:7700/", private_key: nil # Reduce recompilation time # https://dashbit.co/blog/speeding-up-re-compilation-of-elixir-projects diff --git a/test/pleroma/search/meilisearch_test.exs b/test/pleroma/search/meilisearch_test.exs new file mode 100644 index 0000000000..6e13c8edf9 --- /dev/null +++ b/test/pleroma/search/meilisearch_test.exs @@ -0,0 +1,108 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Search.MeilisearchTest do + require Pleroma.Constants + + use Pleroma.DataCase + + import Pleroma.Factory + import Tesla.Mock + import Mock + + alias Pleroma.Web.CommonAPI + alias Pleroma.Search.Meilisearch + + setup_all do + Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) + :ok + end + + describe "meilisearch" do + setup do: clear_config([Pleroma.Search, :module], Meilisearch) + + setup_with_mocks( + [ + {Meilisearch, [:passthrough], + [ + add_to_index: fn a -> passthrough([a]) end, + remove_from_index: fn a -> passthrough([a]) end + ]} + ], + context, + do: {:ok, context} + ) + + test "indexes a local post on creation" do + user = insert(:user) + + mock_global(fn + %{method: :post, url: "http://127.0.0.1:7700/indexes/objects/documents", body: body} -> + assert match?( + [%{"content" => "guys i just don't wanna leave the swamp"}], + Jason.decode!(body) + ) + + json(%{updateId: 1}) + end) + + {:ok, activity} = + CommonAPI.post(user, %{ + status: "guys i just don't wanna leave the swamp", + visibility: "public" + }) + + assert_called(Meilisearch.add_to_index(activity)) + end + + test "doesn't index posts that are not public" do + user = insert(:user) + + Enum.each(["unlisted", "private", "direct"], fn visiblity -> + {:ok, _} = + CommonAPI.post(user, %{ + status: "guys i just don't wanna leave the swamp", + visibility: visiblity + }) + end) + + history = call_history(Meilisearch) + assert Enum.count(history) == 3 + + Enum.each(history, fn {_, _, return} -> + assert is_nil(return) + end) + end + + test "deletes posts from index when deleted locally" do + user = insert(:user) + + mock_global(fn + %{method: :post, url: "http://127.0.0.1:7700/indexes/objects/documents", body: body} -> + assert match?( + [%{"content" => "guys i just don't wanna leave the swamp"}], + Jason.decode!(body) + ) + + json(%{updateId: 1}) + + %{method: :delete, url: "http://127.0.0.1:7700/indexes/objects/documents/" <> id} -> + assert String.length(id) > 1 + json(%{updateId: 2}) + end) + + {:ok, activity} = + CommonAPI.post(user, %{ + status: "guys i just don't wanna leave the swamp", + visibility: "public" + }) + + assert_called(Meilisearch.add_to_index(activity)) + + {:ok, _} = CommonAPI.delete(activity.id, user) + + assert_called(Meilisearch.remove_from_index(:_)) + end + end +end From a5bb7f9345ff73469c0d776bce5455ec4f27b4ee Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 23 Aug 2021 23:52:37 +0300 Subject: [PATCH 20/63] Add private_key: nil to default meilisearch options --- config/config.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.exs b/config/config.exs index 1df7dd44b5..711775982e 100644 --- a/config/config.exs +++ b/config/config.exs @@ -872,7 +872,7 @@ config :pleroma, Pleroma.Search, module: Pleroma.Activity.Search -config :pleroma, Pleroma.Search.Meilisearch, url: "http://127.0.0.1:7700/" +config :pleroma, Pleroma.Search.Meilisearch, url: "http://127.0.0.1:7700/", private_key: nil # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. From 40280cc273ad7f2b355846e2f41b9873a8d5ff2c Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 28 Aug 2021 15:59:13 +0300 Subject: [PATCH 21/63] Reorder ranking rules for (maybe) better results --- lib/mix/tasks/pleroma/search/meilisearch.ex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index f2d9fe3128..cdf9ab0bd3 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -18,12 +18,12 @@ def run(["index"]) do "/indexes/objects/settings/ranking-rules", [ "desc(published)", - "typo", "words", + "exactness", "proximity", - "attribute", "wordsPosition", - "exactness" + "typo", + "attribute" ] ) From 6beef2d1179ab9a377e87872b7fbe2997bbbbebd Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Fri, 8 Oct 2021 12:24:37 +0300 Subject: [PATCH 22/63] Move add_to_index / remove_from_index to Pleroma.Actitivy.Search --- lib/pleroma/activity.ex | 2 -- lib/pleroma/activity/search.ex | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex index 2c168fd410..ebfd4ed45f 100644 --- a/lib/pleroma/activity.ex +++ b/lib/pleroma/activity.ex @@ -367,8 +367,6 @@ def restrict_deactivated_users(query) do end defdelegate search(user, query, options \\ []), to: Pleroma.Activity.Search - def add_to_index(_activity), do: nil - def remove_from_index(_object), do: nil def direct_conversation_id(activity, for_user) do alias Pleroma.Conversation.Participation diff --git a/lib/pleroma/activity/search.ex b/lib/pleroma/activity/search.ex index 3dce9d3553..47ab5208ce 100644 --- a/lib/pleroma/activity/search.ex +++ b/lib/pleroma/activity/search.ex @@ -45,6 +45,9 @@ def search(user, search_query, options \\ []) do end end + def add_to_index(_activity), do: nil + def remove_from_index(_object), do: nil + def maybe_restrict_author(query, %User{} = author) do Activity.Queries.by_author(query, author) end From c569ad05b3d812c87171e68eac79eec749321033 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 12 Oct 2021 19:14:39 +0300 Subject: [PATCH 23/63] Add more documentation about rum to meilisearch docs --- docs/configuration/search.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/configuration/search.md b/docs/configuration/search.md index 14ec2bc631..e9743f1a45 100644 --- a/docs/configuration/search.md +++ b/docs/configuration/search.md @@ -12,6 +12,15 @@ While it has no external dependencies, it has problems with performance and rele ## Meilisearch +Note that it's quite a bit more memory hungry than PostgreSQL (around 4-5G for ~1.2 million +posts while idle and up to 7G while indexing initially). The disk usage for this additional index is also +around 4 gigabytes. Like [RUM](./cheatsheet.md#rum-indexing-for-full-text-search) indexes, it offers considerably +higher performance and ordering by timestamp in a reasonable amount of time. +Additionally, the search results seem to be more accurate. + +Due to high memory usage, it may be best to set it up on a different machine, if running pleroma on a low-resource +computer, and use private key authentication to secure the remote search instance. + To use [meilisearch](https://www.meilisearch.com/), set the search module to `Pleroma.Search.Meilisearch`: > config :pleroma, Pleroma.Search, module: Pleroma.Search.Meilisearch From 95cb2bb694e3f8857895b21331b02b9277d65d9b Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 12 Oct 2021 19:17:37 +0300 Subject: [PATCH 24/63] Don't try removing from index again in common_api It's already removed in the side effects of the pipeline --- lib/pleroma/web/common_api.ex | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/pleroma/web/common_api.ex b/lib/pleroma/web/common_api.ex index ba6c079757..89f5dd6065 100644 --- a/lib/pleroma/web/common_api.ex +++ b/lib/pleroma/web/common_api.ex @@ -147,9 +147,6 @@ def delete(activity_id, user) do true <- User.superuser?(user) || user.ap_id == object.data["actor"], {:ok, delete_data, _} <- Builder.delete(user, object.data["id"]), {:ok, delete, _} <- Pipeline.common_pipeline(delete_data, local: true) do - # Remove from search index for local posts - Pleroma.Search.remove_from_index(object) - {:ok, delete} else {:find_activity, _} -> From cf558208c202d5188954e26077d35bcc1ae02fce Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 12 Oct 2021 19:34:57 +0300 Subject: [PATCH 25/63] Use proper deleted object for removing from index --- lib/pleroma/web/activity_pub/side_effects.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index 15e006b18b..4762b5ac60 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -330,7 +330,7 @@ def handle(%{data: %{"type" => "Delete", "object" => deleted_object}} = object, if result == :ok do Notification.create_notifications(object) - Pleroma.Search.remove_from_index(object) + Pleroma.Search.remove_from_index(deleted_object) {:ok, object, meta} else From e4b7a3f51f270f468c15cc4ce850c847633c030b Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Fri, 29 Oct 2021 00:38:00 +0300 Subject: [PATCH 26/63] Modify some meilisearch variables --- config/config.exs | 2 +- lib/pleroma/search/meilisearch.ex | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/config.exs b/config/config.exs index 711775982e..b55dd94cb2 100644 --- a/config/config.exs +++ b/config/config.exs @@ -867,7 +867,7 @@ config :pleroma, ConcurrentLimiter, [ {Pleroma.Web.RichMedia.Helpers, [max_running: 5, max_waiting: 5]}, {Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]}, - {Pleroma.Search, [max_running: 20, max_waiting: 50]} + {Pleroma.Search, [max_running: 30, max_waiting: 50]} ] config :pleroma, Pleroma.Search, module: Pleroma.Activity.Search diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 212bdd473c..b8248e40ce 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -47,7 +47,7 @@ def meili_delete!(path) do Path.join(endpoint, path), "", meili_headers(), - timeout: :infinity + [] ) end From 0b4fd0d342e3ced073e82355b380cbfee5478c60 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Fri, 29 Oct 2021 13:58:24 +0300 Subject: [PATCH 27/63] Set content-type to application/json --- lib/pleroma/search/meilisearch.ex | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index b8248e40ce..d94ab8b646 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -10,7 +10,8 @@ defmodule Pleroma.Search.Meilisearch do defp meili_headers() do private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key]) - if is_nil(private_key), do: [], else: [{"X-Meili-API-Key", private_key}] + [{"Content-Type", "application/json"}] ++ + if is_nil(private_key), do: [], else: [{"X-Meili-API-Key", private_key}] end def meili_get!(path) do From 4445421297f4a4375ce9df4857a66ad08e984507 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Fri, 29 Oct 2021 21:04:59 +0300 Subject: [PATCH 28/63] Only add local posts to index in activity_pub Remote ones are already added in another place --- lib/pleroma/web/activity_pub/activity_pub.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 7178cf9eb0..cdc70aacf2 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -141,7 +141,7 @@ def insert(map, local \\ true, fake \\ false, bypass_actor_check \\ false) when end) # Add local posts to search index - Pleroma.Search.add_to_index(activity) + if local, do: Pleroma.Search.add_to_index(activity) {:ok, activity} else From e928e307f34542b0a0af8b615c986aeac478b637 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Wed, 10 Nov 2021 21:25:12 +0300 Subject: [PATCH 29/63] Add a reindex option Signed-off-by: Ekaterina Vaartis --- lib/mix/tasks/pleroma/search/meilisearch.ex | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index cdf9ab0bd3..2a3c3a8b92 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -11,9 +11,11 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do import Pleroma.Search.Meilisearch, only: [meili_post!: 2, meili_delete!: 1, meili_get!: 1] - def run(["index"]) do + def run(["index" | args]) do start_pleroma() + is_reindex = "--reindex" in args + meili_post!( "/indexes/objects/settings/ranking-rules", [ @@ -68,6 +70,19 @@ def run(["index"]) do {[objects], new_acc} end) |> Stream.each(fn objects -> + objects = + objects + |> Enum.filter(fn o -> + if is_reindex do + result = meili_get!("/indexes/objects/documents/#{o.id}") + + # Filter out the already indexed documents. This is true when the document does not exist + result["errorCode"] == "document_not_found" + else + true + end + end) + result = meili_post!( "/indexes/objects/documents", From 9c1a9307079c8d007ae7cbf3e089d2bc5ea6b733 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 13 Nov 2021 15:07:51 +0300 Subject: [PATCH 30/63] Support reindexing meilisearch >=0.24.0 It has has a different error code key --- lib/mix/tasks/pleroma/search/meilisearch.ex | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 2a3c3a8b92..3b134ad3ff 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -76,8 +76,14 @@ def run(["index" | args]) do if is_reindex do result = meili_get!("/indexes/objects/documents/#{o.id}") + # With >= 0.24.0 the name for "errorCode" is just "code" + error_code_key = + if meili_get!("/version")["pkgVersion"] |> Version.match?(">= 0.24.0"), + do: "code", + else: "errorCode" + # Filter out the already indexed documents. This is true when the document does not exist - result["errorCode"] == "document_not_found" + result[error_code_key] == "document_not_found" else true end From 8898b5e927bae27a521e4eadd0faf970ad27c5bc Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 14 Nov 2021 20:15:12 +0300 Subject: [PATCH 31/63] Fix a typo in search docs --- docs/configuration/search.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration/search.md b/docs/configuration/search.md index e9743f1a45..9adc7884ff 100644 --- a/docs/configuration/search.md +++ b/docs/configuration/search.md @@ -60,7 +60,7 @@ have to do it one time, but it might take a while, depending on the amount of po consuming process for `meilisearch`, and it will take a lot of RAM when running if you have a lot of posts (seems to be around 5G for ~1.2 million posts while idle and up to 7G while indexing initially, but your experience may be different). -To start te initial indexing, run the `index` command: +To start the initial indexing, run the `index` command: === "OTP" ```sh From 7009ef5672ad20f92374d218cd614a38cd70515e Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 14 Nov 2021 20:24:05 +0300 Subject: [PATCH 32/63] Move the search.ex file so credo doesn't complain --- lib/pleroma/{search => }/search.ex | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename lib/pleroma/{search => }/search.ex (100%) diff --git a/lib/pleroma/search/search.ex b/lib/pleroma/search.ex similarity index 100% rename from lib/pleroma/search/search.ex rename to lib/pleroma/search.ex From 39e596a5b51c0c86b6d6bd5f23177a1e6a64cf0b Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 14 Nov 2021 21:42:18 +0300 Subject: [PATCH 33/63] Style fixes --- lib/mix/tasks/pleroma/search/meilisearch.ex | 3 ++- lib/pleroma/search/meilisearch.ex | 2 +- test/pleroma/search/meilisearch_test.exs | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 3b134ad3ff..62ace7e397 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -82,7 +82,8 @@ def run(["index" | args]) do do: "code", else: "errorCode" - # Filter out the already indexed documents. This is true when the document does not exist + # Filter out the already indexed documents. + # This is true when the document does not exist result[error_code_key] == "document_not_found" else true diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index d94ab8b646..41f99ad9fc 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -7,7 +7,7 @@ defmodule Pleroma.Search.Meilisearch do import Pleroma.Activity.Search import Ecto.Query - defp meili_headers() do + defp meili_headers do private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key]) [{"Content-Type", "application/json"}] ++ diff --git a/test/pleroma/search/meilisearch_test.exs b/test/pleroma/search/meilisearch_test.exs index 6e13c8edf9..251388ea2d 100644 --- a/test/pleroma/search/meilisearch_test.exs +++ b/test/pleroma/search/meilisearch_test.exs @@ -11,8 +11,8 @@ defmodule Pleroma.Search.MeilisearchTest do import Tesla.Mock import Mock - alias Pleroma.Web.CommonAPI alias Pleroma.Search.Meilisearch + alias Pleroma.Web.CommonAPI setup_all do Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) From 0fae71f88d142f64ec18a49ff4292db816dacdc8 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 16 Nov 2021 21:54:26 +0300 Subject: [PATCH 34/63] Rename search.ex to database_search.ex and add search/2 --- lib/pleroma/{search.ex => search/database_search.ex} | 8 +++++++- lib/pleroma/web/activity_pub/activity_pub.ex | 2 +- lib/pleroma/web/activity_pub/side_effects.ex | 4 ++-- .../web/mastodon_api/controllers/search_controller.ex | 4 +--- 4 files changed, 11 insertions(+), 7 deletions(-) rename lib/pleroma/{search.ex => search/database_search.ex} (68%) diff --git a/lib/pleroma/search.ex b/lib/pleroma/search/database_search.ex similarity index 68% rename from lib/pleroma/search.ex rename to lib/pleroma/search/database_search.ex index e363abf194..be0e19be06 100644 --- a/lib/pleroma/search.ex +++ b/lib/pleroma/search/database_search.ex @@ -1,4 +1,4 @@ -defmodule Pleroma.Search do +defmodule Pleroma.Search.DatabaseSearch do def add_to_index(activity) do search_module = Pleroma.Config.get([Pleroma.Search, :module]) @@ -15,4 +15,10 @@ def remove_from_index(object) do Task.start(fn -> search_module.remove_from_index(object) end) end) end + + def search(query, options) do + search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) + + search_module.search(options[:for_user], query, options) + end end diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index cdc70aacf2..7e34446760 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -141,7 +141,7 @@ def insert(map, local \\ true, fake \\ false, bypass_actor_check \\ false) when end) # Add local posts to search index - if local, do: Pleroma.Search.add_to_index(activity) + if local, do: Pleroma.Search.DatabaseSearch.add_to_index(activity) {:ok, activity} else diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index 4762b5ac60..fa57eab691 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -227,7 +227,7 @@ def handle(%{data: %{"type" => "Create"}} = activity, meta) do Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) end) - Pleroma.Search.add_to_index(Map.put(activity, :object, object)) + Pleroma.Search.DatabaseSearch.add_to_index(Map.put(activity, :object, object)) meta = meta @@ -330,7 +330,7 @@ def handle(%{data: %{"type" => "Delete", "object" => deleted_object}} = object, if result == :ok do Notification.create_notifications(object) - Pleroma.Search.remove_from_index(deleted_object) + Pleroma.Search.DatabaseSearch.remove_from_index(deleted_object) {:ok, object, meta} else diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 99c33eba6c..10f1aa5324 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -99,9 +99,7 @@ defp resource_search(_, "accounts", query, options) do end defp resource_search(_, "statuses", query, options) do - search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) - - statuses = with_fallback(fn -> search_module.search(options[:for_user], query, options) end) + statuses = with_fallback(fn -> Pleroma.Search.DatabaseSearch.search(query, options) end) StatusView.render("index.json", activities: statuses, From a6946048fbe049aa223d094d36eb767739ab5ff2 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Wed, 17 Nov 2021 22:29:49 +0300 Subject: [PATCH 35/63] Rename Activity.Search to Search.DatabaseSearch --- config/config.exs | 2 +- config/test.exs | 2 +- docs/configuration/search.md | 2 +- lib/pleroma/activity.ex | 2 +- lib/pleroma/activity/search.ex | 165 ------------------ lib/pleroma/search.ex | 24 +++ lib/pleroma/search/database_search.ex | 157 +++++++++++++++-- lib/pleroma/search/meilisearch.ex | 2 +- lib/pleroma/web/activity_pub/activity_pub.ex | 2 +- lib/pleroma/web/activity_pub/side_effects.ex | 4 +- .../controllers/search_controller.ex | 2 +- .../database_search_test.ex} | 10 +- 12 files changed, 181 insertions(+), 193 deletions(-) delete mode 100644 lib/pleroma/activity/search.ex create mode 100644 lib/pleroma/search.ex rename test/pleroma/{activity/search_test.exs => search/database_search_test.ex} (86%) diff --git a/config/config.exs b/config/config.exs index b55dd94cb2..f7f3a14544 100644 --- a/config/config.exs +++ b/config/config.exs @@ -870,7 +870,7 @@ {Pleroma.Search, [max_running: 30, max_waiting: 50]} ] -config :pleroma, Pleroma.Search, module: Pleroma.Activity.Search +config :pleroma, Pleroma.Search, module: Pleroma.Search.DatabaseSearch config :pleroma, Pleroma.Search.Meilisearch, url: "http://127.0.0.1:7700/", private_key: nil diff --git a/config/test.exs b/config/test.exs index c9b2b51ba1..ea29be6388 100644 --- a/config/test.exs +++ b/config/test.exs @@ -133,7 +133,7 @@ ap_streamer: Pleroma.Web.ActivityPub.ActivityPubMock, logger: Pleroma.LoggerMock -config :pleroma, Pleroma.Search, module: Pleroma.Activity +config :pleroma, Pleroma.Search, module: Pleroma.Search.DatabaseSearch config :pleroma, Pleroma.Search.Meilisearch, url: "http://127.0.0.1:7700/", private_key: nil diff --git a/docs/configuration/search.md b/docs/configuration/search.md index 9adc7884ff..c7e77d9c22 100644 --- a/docs/configuration/search.md +++ b/docs/configuration/search.md @@ -6,7 +6,7 @@ To use built-in search that has no external dependencies, set the search module to `Pleroma.Activity`: -> config :pleroma, Pleroma.Search, module: Pleroma.Activity +> config :pleroma, Pleroma.Search, module: Pleroma.Search.DatabaseSearch While it has no external dependencies, it has problems with performance and relevancy. diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex index ebfd4ed45f..389c806918 100644 --- a/lib/pleroma/activity.ex +++ b/lib/pleroma/activity.ex @@ -366,7 +366,7 @@ def restrict_deactivated_users(query) do from(activity in query, where: activity.actor not in subquery(deactivated_users_query)) end - defdelegate search(user, query, options \\ []), to: Pleroma.Activity.Search + defdelegate search(user, query, options \\ []), to: Pleroma.Search.DatabaseSearch def direct_conversation_id(activity, for_user) do alias Pleroma.Conversation.Participation diff --git a/lib/pleroma/activity/search.ex b/lib/pleroma/activity/search.ex deleted file mode 100644 index 47ab5208ce..0000000000 --- a/lib/pleroma/activity/search.ex +++ /dev/null @@ -1,165 +0,0 @@ -# Pleroma: A lightweight social networking server -# Copyright © 2017-2022 Pleroma Authors -# SPDX-License-Identifier: AGPL-3.0-only - -defmodule Pleroma.Activity.Search do - alias Pleroma.Activity - alias Pleroma.Object.Fetcher - alias Pleroma.Pagination - alias Pleroma.User - alias Pleroma.Web.ActivityPub.Visibility - - require Pleroma.Constants - - import Ecto.Query - - def search(user, search_query, options \\ []) do - index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin - limit = Enum.min([Keyword.get(options, :limit), 40]) - offset = Keyword.get(options, :offset, 0) - author = Keyword.get(options, :author) - - search_function = - if :persistent_term.get({Pleroma.Repo, :postgres_version}) >= 11 do - :websearch - else - :plain - end - - try do - Activity - |> Activity.with_preloaded_object() - |> Activity.restrict_deactivated_users() - |> restrict_public(user) - |> query_with(index_type, search_query, search_function) - |> maybe_restrict_local(user) - |> maybe_restrict_author(author) - |> maybe_restrict_blocked(user) - |> Pagination.fetch_paginated( - %{"offset" => offset, "limit" => limit, "skip_order" => index_type == :rum}, - :offset - ) - |> maybe_fetch(user, search_query) - rescue - _ -> maybe_fetch([], user, search_query) - end - end - - def add_to_index(_activity), do: nil - def remove_from_index(_object), do: nil - - def maybe_restrict_author(query, %User{} = author) do - Activity.Queries.by_author(query, author) - end - - def maybe_restrict_author(query, _), do: query - - def maybe_restrict_blocked(query, %User{} = user) do - Activity.Queries.exclude_authors(query, User.blocked_users_ap_ids(user)) - end - - def maybe_restrict_blocked(query, _), do: query - - defp restrict_public(q, user) when not is_nil(user) do - intended_recipients = [ - Pleroma.Constants.as_public(), - Pleroma.Web.ActivityPub.Utils.as_local_public() - ] - - from([a, o] in q, - where: fragment("?->>'type' = 'Create'", a.data), - where: fragment("? && ?", ^intended_recipients, a.recipients) - ) - end - - defp restrict_public(q, _user) do - from([a, o] in q, - where: fragment("?->>'type' = 'Create'", a.data), - where: ^Pleroma.Constants.as_public() in a.recipients - ) - end - - defp query_with(q, :gin, search_query, :plain) do - %{rows: [[tsc]]} = - Ecto.Adapters.SQL.query!( - Pleroma.Repo, - "select current_setting('default_text_search_config')::regconfig::oid;" - ) - - from([a, o] in q, - where: - fragment( - "to_tsvector(?::oid::regconfig, ?->>'content') @@ plainto_tsquery(?)", - ^tsc, - o.data, - ^search_query - ) - ) - end - - defp query_with(q, :gin, search_query, :websearch) do - %{rows: [[tsc]]} = - Ecto.Adapters.SQL.query!( - Pleroma.Repo, - "select current_setting('default_text_search_config')::regconfig::oid;" - ) - - from([a, o] in q, - where: - fragment( - "to_tsvector(?::oid::regconfig, ?->>'content') @@ websearch_to_tsquery(?)", - ^tsc, - o.data, - ^search_query - ) - ) - end - - defp query_with(q, :rum, search_query, :plain) do - from([a, o] in q, - where: - fragment( - "? @@ plainto_tsquery(?)", - o.fts_content, - ^search_query - ), - order_by: [fragment("? <=> now()::date", o.inserted_at)] - ) - end - - defp query_with(q, :rum, search_query, :websearch) do - from([a, o] in q, - where: - fragment( - "? @@ websearch_to_tsquery(?)", - o.fts_content, - ^search_query - ), - order_by: [fragment("? <=> now()::date", o.inserted_at)] - ) - end - - def maybe_restrict_local(q, user) do - limit = Pleroma.Config.get([:instance, :limit_to_local_content], :unauthenticated) - - case {limit, user} do - {:all, _} -> restrict_local(q) - {:unauthenticated, %User{}} -> q - {:unauthenticated, _} -> restrict_local(q) - {false, _} -> q - end - end - - defp restrict_local(q), do: where(q, local: true) - - def maybe_fetch(activities, user, search_query) do - with true <- Regex.match?(~r/https?:/, search_query), - {:ok, object} <- Fetcher.fetch_object_from_id(search_query), - %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]), - true <- Visibility.visible_for_user?(activity, user) do - [activity | activities] - else - _ -> activities - end - end -end diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex new file mode 100644 index 0000000000..ae0b28c54e --- /dev/null +++ b/lib/pleroma/search.ex @@ -0,0 +1,24 @@ +defmodule Pleroma.Search do + def add_to_index(activity) do + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.add_to_index(activity) end) + end) + end + + def remove_from_index(object) do + # Also delete from search index + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + ConcurrentLimiter.limit(Pleroma.Search, fn -> + Task.start(fn -> search_module.remove_from_index(object) end) + end) + end + + def search(query, options) do + search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) + + search_module.search(options[:for_user], query, options) + end +end diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex index be0e19be06..5a8b8ca67b 100644 --- a/lib/pleroma/search/database_search.ex +++ b/lib/pleroma/search/database_search.ex @@ -1,24 +1,153 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + defmodule Pleroma.Search.DatabaseSearch do - def add_to_index(activity) do - search_module = Pleroma.Config.get([Pleroma.Search, :module]) + alias Pleroma.Activity + alias Pleroma.Object.Fetcher + alias Pleroma.Pagination + alias Pleroma.User + alias Pleroma.Web.ActivityPub.Visibility - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.add_to_index(activity) end) - end) + require Pleroma.Constants + + import Ecto.Query + + def search(user, search_query, options \\ []) do + index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin + limit = Enum.min([Keyword.get(options, :limit), 40]) + offset = Keyword.get(options, :offset, 0) + author = Keyword.get(options, :author) + + search_function = + if :persistent_term.get({Pleroma.Repo, :postgres_version}) >= 11 do + :websearch + else + :plain + end + + try do + Activity + |> Activity.with_preloaded_object() + |> Activity.restrict_deactivated_users() + |> restrict_public() + |> query_with(index_type, search_query, search_function) + |> maybe_restrict_local(user) + |> maybe_restrict_author(author) + |> maybe_restrict_blocked(user) + |> Pagination.fetch_paginated( + %{"offset" => offset, "limit" => limit, "skip_order" => index_type == :rum}, + :offset + ) + |> maybe_fetch(user, search_query) + rescue + _ -> maybe_fetch([], user, search_query) + end end - def remove_from_index(object) do - # Also delete from search index - search_module = Pleroma.Config.get([Pleroma.Search, :module]) + def add_to_index(_activity), do: nil + def remove_from_index(_object), do: nil - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.remove_from_index(object) end) - end) + def maybe_restrict_author(query, %User{} = author) do + Activity.Queries.by_author(query, author) end - def search(query, options) do - search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) + def maybe_restrict_author(query, _), do: query - search_module.search(options[:for_user], query, options) + def maybe_restrict_blocked(query, %User{} = user) do + Activity.Queries.exclude_authors(query, User.blocked_users_ap_ids(user)) + end + + def maybe_restrict_blocked(query, _), do: query + + def restrict_public(q) do + from([a, o] in q, + where: fragment("?->>'type' = 'Create'", a.data), + where: ^Pleroma.Constants.as_public() in a.recipients + ) + end + + defp query_with(q, :gin, search_query, :plain) do + %{rows: [[tsc]]} = + Ecto.Adapters.SQL.query!( + Pleroma.Repo, + "select current_setting('default_text_search_config')::regconfig::oid;" + ) + + from([a, o] in q, + where: + fragment( + "to_tsvector(?::oid::regconfig, ?->>'content') @@ plainto_tsquery(?)", + ^tsc, + o.data, + ^search_query + ) + ) + end + + defp query_with(q, :gin, search_query, :websearch) do + %{rows: [[tsc]]} = + Ecto.Adapters.SQL.query!( + Pleroma.Repo, + "select current_setting('default_text_search_config')::regconfig::oid;" + ) + + from([a, o] in q, + where: + fragment( + "to_tsvector(?::oid::regconfig, ?->>'content') @@ websearch_to_tsquery(?)", + ^tsc, + o.data, + ^search_query + ) + ) + end + + defp query_with(q, :rum, search_query, :plain) do + from([a, o] in q, + where: + fragment( + "? @@ plainto_tsquery(?)", + o.fts_content, + ^search_query + ), + order_by: [fragment("? <=> now()::date", o.inserted_at)] + ) + end + + defp query_with(q, :rum, search_query, :websearch) do + from([a, o] in q, + where: + fragment( + "? @@ websearch_to_tsquery(?)", + o.fts_content, + ^search_query + ), + order_by: [fragment("? <=> now()::date", o.inserted_at)] + ) + end + + def maybe_restrict_local(q, user) do + limit = Pleroma.Config.get([:instance, :limit_to_local_content], :unauthenticated) + + case {limit, user} do + {:all, _} -> restrict_local(q) + {:unauthenticated, %User{}} -> q + {:unauthenticated, _} -> restrict_local(q) + {false, _} -> q + end + end + + defp restrict_local(q), do: where(q, local: true) + + def maybe_fetch(activities, user, search_query) do + with true <- Regex.match?(~r/https?:/, search_query), + {:ok, object} <- Fetcher.fetch_object_from_id(search_query), + %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]), + true <- Visibility.visible_for_user?(activity, user) do + [activity | activities] + else + _ -> activities + end end end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 41f99ad9fc..fa9e27b037 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -4,7 +4,7 @@ defmodule Pleroma.Search.Meilisearch do alias Pleroma.Activity - import Pleroma.Activity.Search + import Pleroma.Search.DatabaseSearch import Ecto.Query defp meili_headers do diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 7e34446760..cdc70aacf2 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -141,7 +141,7 @@ def insert(map, local \\ true, fake \\ false, bypass_actor_check \\ false) when end) # Add local posts to search index - if local, do: Pleroma.Search.DatabaseSearch.add_to_index(activity) + if local, do: Pleroma.Search.add_to_index(activity) {:ok, activity} else diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index fa57eab691..4762b5ac60 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -227,7 +227,7 @@ def handle(%{data: %{"type" => "Create"}} = activity, meta) do Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) end) - Pleroma.Search.DatabaseSearch.add_to_index(Map.put(activity, :object, object)) + Pleroma.Search.add_to_index(Map.put(activity, :object, object)) meta = meta @@ -330,7 +330,7 @@ def handle(%{data: %{"type" => "Delete", "object" => deleted_object}} = object, if result == :ok do Notification.create_notifications(object) - Pleroma.Search.DatabaseSearch.remove_from_index(deleted_object) + Pleroma.Search.remove_from_index(deleted_object) {:ok, object, meta} else diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 10f1aa5324..e4acba2264 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -99,7 +99,7 @@ defp resource_search(_, "accounts", query, options) do end defp resource_search(_, "statuses", query, options) do - statuses = with_fallback(fn -> Pleroma.Search.DatabaseSearch.search(query, options) end) + statuses = with_fallback(fn -> Pleroma.Search.search(query, options) end) StatusView.render("index.json", activities: statuses, diff --git a/test/pleroma/activity/search_test.exs b/test/pleroma/search/database_search_test.ex similarity index 86% rename from test/pleroma/activity/search_test.exs rename to test/pleroma/search/database_search_test.ex index 3b5fd2c3c5..c123d0b84c 100644 --- a/test/pleroma/activity/search_test.exs +++ b/test/pleroma/search/database_search_test.ex @@ -2,8 +2,8 @@ # Copyright © 2017-2022 Pleroma Authors # SPDX-License-Identifier: AGPL-3.0-only -defmodule Pleroma.Activity.SearchTest do - alias Pleroma.Activity.Search +defmodule Pleroma.Search.DatabaseSearchTest do + alias Pleroma.Search.DatabaseSearch alias Pleroma.Web.CommonAPI import Pleroma.Factory @@ -13,7 +13,7 @@ test "it finds something" do user = insert(:user) {:ok, post} = CommonAPI.post(user, %{status: "it's wednesday my dudes"}) - [result] = Search.search(nil, "wednesday") + [result] = DatabaseSearch.search(nil, "wednesday") assert result.id == post.id end @@ -45,7 +45,7 @@ test "using plainto_tsquery on postgres < 11" do {:ok, _post2} = CommonAPI.post(user, %{status: "it's wednesday my bros"}) # plainto doesn't understand complex queries - assert [result] = Search.search(nil, "wednesday -dudes") + assert [result] = DatabaseSearch.search(nil, "wednesday -dudes") assert result.id == post.id end @@ -55,7 +55,7 @@ test "using websearch_to_tsquery" do {:ok, _post} = CommonAPI.post(user, %{status: "it's wednesday my dudes"}) {:ok, other_post} = CommonAPI.post(user, %{status: "it's wednesday my bros"}) - assert [result] = Search.search(nil, "wednesday -dudes") + assert [result] = DatabaseSearch.search(nil, "wednesday -dudes") assert result.id == other_post.id end From a12f63bc81481e3f852934e8cc1269e16a57cf0a Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 22 Nov 2021 21:39:54 +0300 Subject: [PATCH 36/63] Implement suggestions from the Meilisearch MR - Index unlisted posts - Move version check outside of the streaming and only do it once - Use a PUT request instead of checking manually if there is need to insert - Add error handling, sort of --- lib/mix/tasks/pleroma/search/meilisearch.ex | 84 +++++++++----------- lib/pleroma/search/meilisearch.ex | 85 ++++++++++++++------- 2 files changed, 93 insertions(+), 76 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 62ace7e397..6730a99a90 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -3,38 +3,40 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Mix.Tasks.Pleroma.Search.Meilisearch do - require Logger require Pleroma.Constants import Mix.Pleroma import Ecto.Query - import Pleroma.Search.Meilisearch, only: [meili_post!: 2, meili_delete!: 1, meili_get!: 1] + import Pleroma.Search.Meilisearch, + only: [meili_post: 2, meili_put: 2, meili_get: 1, meili_delete!: 1] - def run(["index" | args]) do + def run(["index"]) do start_pleroma() - is_reindex = "--reindex" in args + {:ok, _} = + meili_post( + "/indexes/objects/settings/ranking-rules", + [ + "desc(published)", + "words", + "exactness", + "proximity", + "wordsPosition", + "typo", + "attribute" + ] + ) - meili_post!( - "/indexes/objects/settings/ranking-rules", - [ - "desc(published)", - "words", - "exactness", - "proximity", - "wordsPosition", - "typo", - "attribute" - ] - ) + {:ok, _} = + meili_post( + "/indexes/objects/settings/searchable-attributes", + [ + "content" + ] + ) - meili_post!( - "/indexes/objects/settings/searchable-attributes", - [ - "content" - ] - ) + IO.puts("Created indices. Starting to insert posts.") chunk_size = 10_000 @@ -42,11 +44,11 @@ def run(["index" | args]) do fn -> query = from(Pleroma.Object, - # Only index public posts which are notes and have some text + # Only index public and unlisted posts which are notes and have some text where: fragment("data->>'type' = 'Note'") and - fragment("LENGTH(data->>'content') > 0") and - fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()), + (fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()) or + fragment("data->'cc' \\? ?", ^Pleroma.Constants.as_public())), order_by: [desc: fragment("data->'published'")] ) @@ -70,34 +72,18 @@ def run(["index" | args]) do {[objects], new_acc} end) |> Stream.each(fn objects -> - objects = - objects - |> Enum.filter(fn o -> - if is_reindex do - result = meili_get!("/indexes/objects/documents/#{o.id}") - - # With >= 0.24.0 the name for "errorCode" is just "code" - error_code_key = - if meili_get!("/version")["pkgVersion"] |> Version.match?(">= 0.24.0"), - do: "code", - else: "errorCode" - - # Filter out the already indexed documents. - # This is true when the document does not exist - result[error_code_key] == "document_not_found" - else - true - end - end) - result = - meili_post!( + meili_put( "/indexes/objects/documents", objects ) - if not Map.has_key?(result, "updateId") do - IO.puts("Failed to index: #{inspect(result)}") + with {:ok, res} <- result do + if not Map.has_key?(res, "updateId") do + IO.puts("\nFailed to index: #{inspect(result)}") + end + else + e -> IO.puts("\nFailed to index due to network error: #{inspect(e)}") end end) |> Stream.run() @@ -137,7 +123,7 @@ def run(["show-private-key", master_key]) do def run(["stats"]) do start_pleroma() - result = meili_get!("/indexes/objects/stats") + {:ok, result} = meili_get("/indexes/objects/stats") IO.puts("Number of entries: #{result["numberOfDocuments"]}") IO.puts("Indexing? #{result["isIndexing"]}") end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index fa9e27b037..21b44de866 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -14,29 +14,50 @@ defp meili_headers do if is_nil(private_key), do: [], else: [{"X-Meili-API-Key", private_key}] end - def meili_get!(path) do + def meili_get(path) do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - {:ok, result} = + result = Pleroma.HTTP.get( Path.join(endpoint, path), meili_headers() ) - Jason.decode!(result.body) + with {:ok, res} <- result do + {:ok, Jason.decode!(res.body)} + end end - def meili_post!(path, params) do + def meili_post(path, params) do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - {:ok, result} = + result = Pleroma.HTTP.post( Path.join(endpoint, path), Jason.encode!(params), meili_headers() ) - Jason.decode!(result.body) + with {:ok, res} <- result do + {:ok, Jason.decode!(res.body)} + end + end + + def meili_put(path, params) do + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + result = + Pleroma.HTTP.request( + :put, + Path.join(endpoint, path), + Jason.encode!(params), + meili_headers(), + [] + ) + + with {:ok, res} <- result do + {:ok, Jason.decode!(res.body)} + end end def meili_delete!(path) do @@ -57,34 +78,40 @@ def search(user, query, options \\ []) do offset = Keyword.get(options, :offset, 0) author = Keyword.get(options, :author) - result = - meili_post!( + res = + meili_post( "/indexes/objects/search", %{q: query, offset: offset, limit: limit} ) - hits = result["hits"] |> Enum.map(& &1["ap"]) + with {:ok, result} <- res do + hits = result["hits"] |> Enum.map(& &1["ap"]) - try do - hits - |> Activity.create_by_object_ap_id() - |> Activity.with_preloaded_object() - |> Activity.with_preloaded_object() - |> Activity.restrict_deactivated_users() - |> maybe_restrict_local(user) - |> maybe_restrict_author(author) - |> maybe_restrict_blocked(user) - |> maybe_fetch(user, query) - |> order_by([object: obj], desc: obj.data["published"]) - |> Pleroma.Repo.all() - rescue - _ -> maybe_fetch([], user, query) + try do + hits + |> Activity.create_by_object_ap_id() + |> Activity.with_preloaded_object() + |> Activity.with_preloaded_object() + |> Activity.restrict_deactivated_users() + |> maybe_restrict_local(user) + |> maybe_restrict_author(author) + |> maybe_restrict_blocked(user) + |> maybe_fetch(user, query) + |> order_by([object: obj], desc: obj.data["published"]) + |> Pleroma.Repo.all() + rescue + _ -> maybe_fetch([], user, query) + end end end def object_to_search_data(object) do + # Only index public or unlisted Notes if not is_nil(object) and object.data["type"] == "Note" and - Pleroma.Constants.as_public() in object.data["to"] do + not is_nil(object.data["content"]) and + (Pleroma.Constants.as_public() in object.data["to"] or + Pleroma.Constants.as_public() in object.data["cc"]) and + String.length(object.data["content"]) > 1 do data = object.data content_str = @@ -117,13 +144,17 @@ def add_to_index(activity) do if activity.data["type"] == "Create" and maybe_search_data do result = - meili_post!( + meili_put( "/indexes/objects/documents", [maybe_search_data] ) - if not Map.has_key?(result, "updateId") do - Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}") + with {:ok, res} <- result, + true <- Map.has_key?(res, "updateId") do + # Do nothing + else + _ -> + Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}") end end end From 3a11e79de0c7092bf4fe0649e4ab1fcb53eb14a3 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 20 Dec 2021 17:46:23 +0300 Subject: [PATCH 37/63] Add config description for meilisearch --- config/description.exs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/config/description.exs b/config/description.exs index 3a2a652726..81fefaf89e 100644 --- a/config/description.exs +++ b/config/description.exs @@ -3475,5 +3475,40 @@ ] } ] + }, + %{ + group: :pleroma, + key: Pleroma.Search, + type: :group, + description: "General search settings.", + children: [ + %{ + key: :module, + type: :keyword, + description: "Selected search module.", + suggestion: [Pleroma.Search.DatabaseSearch, Pleroma.Search.Meilisearch] + } + ] + }, + %{ + group: :pleroma, + key: Pleroma.Search.Meilisearch, + type: :group, + description: "Meilisearch settings.", + children: [ + %{ + key: :url, + type: :string, + description: "Meilisearch URL.", + suggestion: ["http://127.0.0.1:7700/"] + }, + %{ + key: :private_key, + type: :string, + description: + "Private key for meilisearch authentication, or `nil` to disable private key authentication.", + suggestion: [nil] + } + ] } ] From 3412713c5b2fd24605b18933ef70de164ee14f2d Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 20 Dec 2021 18:16:33 +0300 Subject: [PATCH 38/63] Update search.md documentation with meilisearch indexing steps --- docs/configuration/search.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/configuration/search.md b/docs/configuration/search.md index c7e77d9c22..7dbbd3e175 100644 --- a/docs/configuration/search.md +++ b/docs/configuration/search.md @@ -60,6 +60,15 @@ have to do it one time, but it might take a while, depending on the amount of po consuming process for `meilisearch`, and it will take a lot of RAM when running if you have a lot of posts (seems to be around 5G for ~1.2 million posts while idle and up to 7G while indexing initially, but your experience may be different). +The sequence of actions is as follows: + +1. First, change the configuration to use `Pleroma.Search.Meilisearch` as the search backend +2. Restart your instance, at this point it can be used while the search indexing is running, though search won't return anything +3. Start the initial indexing process (as described below with `index`), + and wait until the task says it sent everything from the database to index +4. Wait until everything is actually indexed (by checking with `stats` as described below), + at this point you don't have to do anything, just wait a while. + To start the initial indexing, run the `index` command: === "OTP" From 3179ed0921197a8a8f32a519c7d41dc09011024d Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 20 Dec 2021 18:48:52 +0300 Subject: [PATCH 39/63] Make chunk size configurable --- config/config.exs | 5 ++++- lib/mix/tasks/pleroma/search/meilisearch.ex | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/config/config.exs b/config/config.exs index f7f3a14544..bcbc59b83c 100644 --- a/config/config.exs +++ b/config/config.exs @@ -872,7 +872,10 @@ config :pleroma, Pleroma.Search, module: Pleroma.Search.DatabaseSearch -config :pleroma, Pleroma.Search.Meilisearch, url: "http://127.0.0.1:7700/", private_key: nil +config :pleroma, Pleroma.Search.Meilisearch, + url: "http://127.0.0.1:7700/", + private_key: nil, + initial_indexing_chunk_size: 100_000 # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 6730a99a90..021552f7b7 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -38,7 +38,7 @@ def run(["index"]) do IO.puts("Created indices. Starting to insert posts.") - chunk_size = 10_000 + chunk_size = Pleroma.Config.get([Pleroma.Search.Meilisearch, :initial_indexing_chunk_size]) Pleroma.Repo.transaction( fn -> From 571533ae2618478f26db312e52265e143356debd Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 20 Dec 2021 19:05:59 +0300 Subject: [PATCH 40/63] Don't support meilisearch < 0.24.0, since it breaks things --- lib/mix/tasks/pleroma/search/meilisearch.ex | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 021552f7b7..5098668ad7 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -14,17 +14,29 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do def run(["index"]) do start_pleroma() + meili_version = + ( + {:ok, result} = meili_get("/version") + + result["pkgVersion"] + ) + + # The ranking rule syntax was changed but nothing about that is mentioned in the changelog + if not Version.match?(meili_version, ">= 0.24.0") do + raise "Meilisearch <0.24.0 not supported" + end + {:ok, _} = meili_post( "/indexes/objects/settings/ranking-rules", [ - "desc(published)", + "published:desc", "words", "exactness", "proximity", - "wordsPosition", "typo", - "attribute" + "attribute", + "sort" ] ) From 4f2637acc6c46ea39ae38e869903e7ffcc38b34d Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 20 Dec 2021 19:27:22 +0300 Subject: [PATCH 41/63] Add description for initial_indexing_chunk_size --- config/description.exs | 8 ++++++++ docs/configuration/search.md | 8 ++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/config/description.exs b/config/description.exs index 81fefaf89e..cea4401baf 100644 --- a/config/description.exs +++ b/config/description.exs @@ -3508,6 +3508,14 @@ description: "Private key for meilisearch authentication, or `nil` to disable private key authentication.", suggestion: [nil] + }, + %{ + key: :initial_indexing_chunk_size, + type: :int, + description: + "Amount of posts in a batch when running the initial indexing operation. Should probably not be more than 100000" <> + " since there's a limit on maximum insert size", + suggestion: [100_000] } ] } diff --git a/docs/configuration/search.md b/docs/configuration/search.md index 7dbbd3e175..a785a18ad1 100644 --- a/docs/configuration/search.md +++ b/docs/configuration/search.md @@ -25,11 +25,15 @@ To use [meilisearch](https://www.meilisearch.com/), set the search module to `Pl > config :pleroma, Pleroma.Search, module: Pleroma.Search.Meilisearch -You then need to set the address of the meilisearch instance, and optionally the private key for authentication. +You then need to set the address of the meilisearch instance, and optionally the private key for authentication. You might +also want to change the `initial_indexing_chunk_size` to be smaller if you're server is not very powerful, but not higher than `100_000`, +because meilisearch will refuse to process it if it's too big. However, in general you want this to be as big as possible, because meilisearch +indexes faster when it can process many posts in a single batch. > config :pleroma, Pleroma.Search.Meilisearch, > url: "http://127.0.0.1:7700/", -> private_key: "private key" +> private_key: "private key", +> initial_indexing_chunk_size: 100_000 Information about setting up meilisearch can be found in the [official documentation](https://docs.meilisearch.com/learn/getting_started/installation.html). From 6f2f457751ea09507045e6dd5d5869a14befd3d1 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Mon, 20 Dec 2021 22:38:50 +0300 Subject: [PATCH 42/63] Add a search backend behaviour --- lib/pleroma/search/database_search.ex | 5 +++++ lib/pleroma/search/meilisearch.ex | 4 ++++ lib/pleroma/search/search_backend.ex | 17 +++++++++++++++++ 3 files changed, 26 insertions(+) create mode 100644 lib/pleroma/search/search_backend.ex diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex index 5a8b8ca67b..3735a5fab4 100644 --- a/lib/pleroma/search/database_search.ex +++ b/lib/pleroma/search/database_search.ex @@ -13,6 +13,8 @@ defmodule Pleroma.Search.DatabaseSearch do import Ecto.Query + @behaviour Pleroma.Search.SearchBackend + def search(user, search_query, options \\ []) do index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin limit = Enum.min([Keyword.get(options, :limit), 40]) @@ -45,7 +47,10 @@ def search(user, search_query, options \\ []) do end end + @impl true def add_to_index(_activity), do: nil + + @impl true def remove_from_index(_object), do: nil def maybe_restrict_author(query, %User{} = author) do diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 21b44de866..33bbf8392c 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -7,6 +7,8 @@ defmodule Pleroma.Search.Meilisearch do import Pleroma.Search.DatabaseSearch import Ecto.Query + @behaviour Pleroma.Search.SearchBackend + defp meili_headers do private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key]) @@ -139,6 +141,7 @@ def object_to_search_data(object) do end end + @impl true def add_to_index(activity) do maybe_search_data = object_to_search_data(activity.object) @@ -159,6 +162,7 @@ def add_to_index(activity) do end end + @impl true def remove_from_index(object) do meili_delete!("/indexes/objects/documents/#{object.id}") end diff --git a/lib/pleroma/search/search_backend.ex b/lib/pleroma/search/search_backend.ex new file mode 100644 index 0000000000..ed6bfd3292 --- /dev/null +++ b/lib/pleroma/search/search_backend.ex @@ -0,0 +1,17 @@ +defmodule Pleroma.Search.SearchBackend do + @doc """ + Add the object associated with the activity to the search index. + + The whole activity is passed, to allow filtering on things such as scope. + """ + @callback add_to_index(activity :: Pleroma.Activity.t()) :: nil + + @doc """ + Remove the object from the index. + + Just the object, as opposed to the whole activity, is passed, since the object + is what contains the actual content and there is no need for fitlering when removing + from index. + """ + @callback remove_from_index(object :: Pleroma.Object.t()) :: nil +end From 2bc21c6f1884bae3226f760ed1da39dd9c5f2958 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 22 Jan 2022 15:23:11 +0300 Subject: [PATCH 43/63] Use oban for search indexing --- config/config.exs | 3 ++- lib/pleroma/search.ex | 15 ++++--------- lib/pleroma/workers/search_indexing_worker.ex | 21 +++++++++++++++++++ 3 files changed, 27 insertions(+), 12 deletions(-) create mode 100644 lib/pleroma/workers/search_indexing_worker.ex diff --git a/config/config.exs b/config/config.exs index bcbc59b83c..3e2f0da3ff 100644 --- a/config/config.exs +++ b/config/config.exs @@ -571,7 +571,8 @@ remote_fetcher: 2, attachments_cleanup: 1, new_users_digest: 1, - mute_expire: 5 + mute_expire: 5, + search_indexing: 1 ], plugins: [Oban.Plugins.Pruner], crontab: [ diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex index ae0b28c54e..af858fc469 100644 --- a/lib/pleroma/search.ex +++ b/lib/pleroma/search.ex @@ -1,19 +1,12 @@ defmodule Pleroma.Search do - def add_to_index(activity) do - search_module = Pleroma.Config.get([Pleroma.Search, :module]) + alias Pleroma.Workers.SearchIndexingWorker - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.add_to_index(activity) end) - end) + def add_to_index(activity) do + SearchIndexingWorker.enqueue("add_to_index", %{"activity" => activity.id}) end def remove_from_index(object) do - # Also delete from search index - search_module = Pleroma.Config.get([Pleroma.Search, :module]) - - ConcurrentLimiter.limit(Pleroma.Search, fn -> - Task.start(fn -> search_module.remove_from_index(object) end) - end) + SearchIndexingWorker.enqueue("remove_from_index", %{"object" => object.id}) end def search(query, options) do diff --git a/lib/pleroma/workers/search_indexing_worker.ex b/lib/pleroma/workers/search_indexing_worker.ex new file mode 100644 index 0000000000..43b7bad1e3 --- /dev/null +++ b/lib/pleroma/workers/search_indexing_worker.ex @@ -0,0 +1,21 @@ +defmodule Pleroma.Workers.SearchIndexingWorker do + use Pleroma.Workers.WorkerHelper, queue: "search_indexing" + + @impl Oban.Worker + + def perform(%Job{args: %{"op" => "add_to_index", "activity" => activity_id}}) do + activity = Pleroma.Activity.get_by_id_with_object(activity_id) + + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + search_module.add_to_index(activity) + end + + def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) do + object = Pleroma.Object.get_by_id(object_id) + + search_module = Pleroma.Config.get([Pleroma.Search, :module]) + + search_module.remove_from_index(object) + end +end From d89dc5518b5c0eb232e7ac85ddd538f89c32606d Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 22 Jan 2022 16:31:32 +0300 Subject: [PATCH 44/63] Fix meilisearch tests and jobs for oban --- lib/pleroma/workers/search_indexing_worker.ex | 4 +++ test/pleroma/search/meilisearch_test.exs | 35 ++++++++++++++----- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/lib/pleroma/workers/search_indexing_worker.ex b/lib/pleroma/workers/search_indexing_worker.ex index 43b7bad1e3..70a8d42d06 100644 --- a/lib/pleroma/workers/search_indexing_worker.ex +++ b/lib/pleroma/workers/search_indexing_worker.ex @@ -9,6 +9,8 @@ def perform(%Job{args: %{"op" => "add_to_index", "activity" => activity_id}}) do search_module = Pleroma.Config.get([Pleroma.Search, :module]) search_module.add_to_index(activity) + + :ok end def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) do @@ -17,5 +19,7 @@ def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) d search_module = Pleroma.Config.get([Pleroma.Search, :module]) search_module.remove_from_index(object) + + :ok end end diff --git a/test/pleroma/search/meilisearch_test.exs b/test/pleroma/search/meilisearch_test.exs index 251388ea2d..da614577f4 100644 --- a/test/pleroma/search/meilisearch_test.exs +++ b/test/pleroma/search/meilisearch_test.exs @@ -6,6 +6,7 @@ defmodule Pleroma.Search.MeilisearchTest do require Pleroma.Constants use Pleroma.DataCase + use Oban.Testing, repo: Pleroma.Repo import Pleroma.Factory import Tesla.Mock @@ -13,6 +14,7 @@ defmodule Pleroma.Search.MeilisearchTest do alias Pleroma.Search.Meilisearch alias Pleroma.Web.CommonAPI + alias Pleroma.Workers.SearchIndexingWorker setup_all do Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) @@ -27,7 +29,8 @@ defmodule Pleroma.Search.MeilisearchTest do {Meilisearch, [:passthrough], [ add_to_index: fn a -> passthrough([a]) end, - remove_from_index: fn a -> passthrough([a]) end + remove_from_index: fn a -> passthrough([a]) end, + meili_put: fn u, a -> passthrough([u, a]) end ]} ], context, @@ -38,7 +41,7 @@ test "indexes a local post on creation" do user = insert(:user) mock_global(fn - %{method: :post, url: "http://127.0.0.1:7700/indexes/objects/documents", body: body} -> + %{method: :put, url: "http://127.0.0.1:7700/indexes/objects/documents", body: body} -> assert match?( [%{"content" => "guys i just don't wanna leave the swamp"}], Jason.decode!(body) @@ -53,6 +56,15 @@ test "indexes a local post on creation" do visibility: "public" }) + args = %{"op" => "add_to_index", "activity" => activity.id} + + assert_enqueued( + worker: SearchIndexingWorker, + args: args + ) + + assert :ok = perform_job(SearchIndexingWorker, args) + assert_called(Meilisearch.add_to_index(activity)) end @@ -60,26 +72,25 @@ test "doesn't index posts that are not public" do user = insert(:user) Enum.each(["unlisted", "private", "direct"], fn visiblity -> - {:ok, _} = + {:ok, activity} = CommonAPI.post(user, %{ status: "guys i just don't wanna leave the swamp", visibility: visiblity }) + + Meilisearch.add_to_index(activity) + assert_not_called(Meilisearch.meili_put(:_)) end) history = call_history(Meilisearch) assert Enum.count(history) == 3 - - Enum.each(history, fn {_, _, return} -> - assert is_nil(return) - end) end test "deletes posts from index when deleted locally" do user = insert(:user) mock_global(fn - %{method: :post, url: "http://127.0.0.1:7700/indexes/objects/documents", body: body} -> + %{method: :put, url: "http://127.0.0.1:7700/indexes/objects/documents", body: body} -> assert match?( [%{"content" => "guys i just don't wanna leave the swamp"}], Jason.decode!(body) @@ -98,10 +109,16 @@ test "deletes posts from index when deleted locally" do visibility: "public" }) - assert_called(Meilisearch.add_to_index(activity)) + args = %{"op" => "add_to_index", "activity" => activity.id} + assert_enqueued(worker: SearchIndexingWorker, args: args) + assert :ok = perform_job(SearchIndexingWorker, args) {:ok, _} = CommonAPI.delete(activity.id, user) + delete_args = %{"op" => "remove_from_index", "object" => activity.object.id} + assert_enqueued(worker: SearchIndexingWorker, args: delete_args) + assert :ok = perform_job(SearchIndexingWorker, delete_args) + assert_called(Meilisearch.remove_from_index(:_)) end end From 3387935e8354e32171fe6e28a8f96f49154acbb3 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 22 Jan 2022 16:52:06 +0300 Subject: [PATCH 45/63] Don't try removing deleted users and such from index as posts --- lib/pleroma/search.ex | 8 ++++---- lib/pleroma/web/activity_pub/side_effects.ex | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex index af858fc469..3b266e59bb 100644 --- a/lib/pleroma/search.ex +++ b/lib/pleroma/search.ex @@ -1,12 +1,12 @@ defmodule Pleroma.Search do alias Pleroma.Workers.SearchIndexingWorker - def add_to_index(activity) do - SearchIndexingWorker.enqueue("add_to_index", %{"activity" => activity.id}) + def add_to_index(%Pleroma.Activity{id: activity_id}) do + SearchIndexingWorker.enqueue("add_to_index", %{"activity" => activity_id}) end - def remove_from_index(object) do - SearchIndexingWorker.enqueue("remove_from_index", %{"object" => object.id}) + def remove_from_index(%Pleroma.Object{id: object_id}) do + SearchIndexingWorker.enqueue("remove_from_index", %{"object" => object_id}) end def search(query, options) do diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index 4762b5ac60..644e626304 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -330,7 +330,10 @@ def handle(%{data: %{"type" => "Delete", "object" => deleted_object}} = object, if result == :ok do Notification.create_notifications(object) - Pleroma.Search.remove_from_index(deleted_object) + # Only remove from index when deleting actual objects, not users or anything else + with %Pleroma.Object{} <- deleted_object do + Pleroma.Search.remove_from_index(deleted_object) + end {:ok, object, meta} else From fd2cfc80d2853c27f4d0c07631849da9b8d73e85 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 22 Jan 2022 17:17:43 +0300 Subject: [PATCH 46/63] Change search_indexing = 10 and retries for indexing = 2 --- config/config.exs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/config/config.exs b/config/config.exs index 3e2f0da3ff..ce9338014e 100644 --- a/config/config.exs +++ b/config/config.exs @@ -572,7 +572,7 @@ attachments_cleanup: 1, new_users_digest: 1, mute_expire: 5, - search_indexing: 1 + search_indexing: 10 ], plugins: [Oban.Plugins.Pruner], crontab: [ @@ -583,7 +583,8 @@ config :pleroma, :workers, retries: [ federator_incoming: 5, - federator_outgoing: 5 + federator_outgoing: 5, + search_indexing: 2 ] config :pleroma, Pleroma.Formatter, From 79225d9b0adcd848502e5ba0bbbb295855a30ba0 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 22 Jan 2022 21:09:53 +0300 Subject: [PATCH 47/63] Actually, unlisted posts are indexed --- test/pleroma/search/meilisearch_test.exs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/test/pleroma/search/meilisearch_test.exs b/test/pleroma/search/meilisearch_test.exs index da614577f4..04a2d75d91 100644 --- a/test/pleroma/search/meilisearch_test.exs +++ b/test/pleroma/search/meilisearch_test.exs @@ -71,19 +71,23 @@ test "indexes a local post on creation" do test "doesn't index posts that are not public" do user = insert(:user) - Enum.each(["unlisted", "private", "direct"], fn visiblity -> + Enum.each(["private", "direct"], fn visibility -> {:ok, activity} = CommonAPI.post(user, %{ status: "guys i just don't wanna leave the swamp", - visibility: visiblity + visibility: visibility }) - Meilisearch.add_to_index(activity) + args = %{"op" => "add_to_index", "activity" => activity.id} + + assert_enqueued(worker: SearchIndexingWorker, args: args) + assert :ok = perform_job(SearchIndexingWorker, args) + assert_not_called(Meilisearch.meili_put(:_)) end) history = call_history(Meilisearch) - assert Enum.count(history) == 3 + assert Enum.count(history) == 2 end test "deletes posts from index when deleted locally" do From 1e23f527e3e22108b402552a0766e488048ed3f4 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 22 Mar 2022 20:29:17 +0300 Subject: [PATCH 48/63] Change the meilisearch key auth to conform to 0.25.0 --- docs/configuration/search.md | 6 +++--- lib/mix/tasks/pleroma/search/meilisearch.ex | 14 ++++++++------ lib/pleroma/search/meilisearch.ex | 2 +- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/docs/configuration/search.md b/docs/configuration/search.md index a785a18ad1..82217e5eeb 100644 --- a/docs/configuration/search.md +++ b/docs/configuration/search.md @@ -47,15 +47,15 @@ you have to get the _private key_, which is actually used for authentication. === "OTP" ```sh - ./bin/pleroma_ctl search.meilisearch show-private-key + ./bin/pleroma_ctl search.meilisearch show-keys ``` === "From Source" ```sh - mix pleroma.search.meilisearch show-private-key + mix pleroma.search.meilisearch show-keys ``` -This is the key you actually put into your configuration file. +You will see a "Default Admin API Key", this is the key you actually put into your configuration file. ### Initial indexing diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 5098668ad7..db56876fa5 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -22,7 +22,7 @@ def run(["index"]) do ) # The ranking rule syntax was changed but nothing about that is mentioned in the changelog - if not Version.match?(meili_version, ">= 0.24.0") do + if not Version.match?(meili_version, ">= 0.25.0") do raise "Meilisearch <0.24.0 not supported" end @@ -112,7 +112,7 @@ def run(["clear"]) do meili_delete!("/indexes/objects/documents") end - def run(["show-private-key", master_key]) do + def run(["show-keys", master_key]) do start_pleroma() endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) @@ -120,15 +120,17 @@ def run(["show-private-key", master_key]) do {:ok, result} = Pleroma.HTTP.get( Path.join(endpoint, "/keys"), - [{"X-Meili-API-Key", master_key}] + [{"Authorization", "Bearer #{master_key}"}] ) decoded = Jason.decode!(result.body) - if decoded["private"] do - IO.puts(decoded["private"]) + if decoded["results"] do + Enum.each(decoded["results"], fn %{"description" => desc, "key" => key} -> + IO.puts("#{desc}: #{key}") + end) else - IO.puts("Error fetching the key, check the master key is correct: #{inspect(decoded)}") + IO.puts("Error fetching the keys, check the master key is correct: #{inspect(decoded)}") end end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 33bbf8392c..0f9182ffc5 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -13,7 +13,7 @@ defp meili_headers do private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key]) [{"Content-Type", "application/json"}] ++ - if is_nil(private_key), do: [], else: [{"X-Meili-API-Key", private_key}] + if is_nil(private_key), do: [], else: [{"Authorization", "Bearer #{private_key}"}] end def meili_get(path) do From 84608be87e2c5961a4deb9030307c978bf1168e5 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Tue, 22 Mar 2022 20:45:49 +0300 Subject: [PATCH 49/63] Change updateId to uid because apparently that's the new name --- lib/mix/tasks/pleroma/search/meilisearch.ex | 2 +- lib/pleroma/search/meilisearch.ex | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index db56876fa5..d4a83c3cdb 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -91,7 +91,7 @@ def run(["index"]) do ) with {:ok, res} <- result do - if not Map.has_key?(res, "updateId") do + if not Map.has_key?(res, "uid") do IO.puts("\nFailed to index: #{inspect(result)}") end else diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 0f9182ffc5..3db65f261c 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -153,7 +153,7 @@ def add_to_index(activity) do ) with {:ok, res} <- result, - true <- Map.has_key?(res, "updateId") do + true <- Map.has_key?(res, "uid") do # Do nothing else _ -> From b150e6f15e0f06c8e23c0ac66aeaf80eb2f8c31a Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Wed, 23 Mar 2022 11:36:01 +0300 Subject: [PATCH 50/63] Update meilisearch docs --- docs/configuration/search.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/configuration/search.md b/docs/configuration/search.md index 82217e5eeb..f131948a72 100644 --- a/docs/configuration/search.md +++ b/docs/configuration/search.md @@ -37,8 +37,10 @@ indexes faster when it can process many posts in a single batch. Information about setting up meilisearch can be found in the [official documentation](https://docs.meilisearch.com/learn/getting_started/installation.html). -You probably want to start it with `MEILI_NO_ANALYTICS=true` and `MEILI_NO_CENTRY=true` environment variables, -to disable analytics. +You probably want to start it with `MEILI_NO_ANALYTICS=true` environment variable to disable analytics. +At least version 0.25.0 is required, but you are strongly adviced to use at least 0.26.0, as it introduces +the `--enable-auto-batching` option which drastically improves performance. Without this option, the search +is hardly usable on a somewhat big instance. ### Private key authentication (optional) From e20f74c71b078d706bc93632773f9b590d2fb018 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Fri, 26 Aug 2022 23:39:58 +0300 Subject: [PATCH 51/63] Remove duplicate function call --- lib/pleroma/search/meilisearch.ex | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 3db65f261c..53f8a25443 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -93,7 +93,6 @@ def search(user, query, options \\ []) do hits |> Activity.create_by_object_ap_id() |> Activity.with_preloaded_object() - |> Activity.with_preloaded_object() |> Activity.restrict_deactivated_users() |> maybe_restrict_local(user) |> maybe_restrict_author(author) From 119b2b847b76c7300bd71699d9f2e5676bdb0bb4 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 27 Aug 2022 00:09:37 +0300 Subject: [PATCH 52/63] Instead of checking string length, explicitly check for "" and "." --- lib/pleroma/search/meilisearch.ex | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 53f8a25443..2e13b84073 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -112,7 +112,7 @@ def object_to_search_data(object) do not is_nil(object.data["content"]) and (Pleroma.Constants.as_public() in object.data["to"] or Pleroma.Constants.as_public() in object.data["cc"]) and - String.length(object.data["content"]) > 1 do + object.data["content"] not in ["", "."] do data = object.data content_str = @@ -127,7 +127,8 @@ def object_to_search_data(object) do trimmed end - if String.length(content) > 1 do + # Make sure we have a non-empty string + if content != "" do {:ok, published, _} = DateTime.from_iso8601(data["published"]) %{ From 102ebb42bdba1673da39a8fa8ed1662bc8565aa4 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 27 Aug 2022 00:19:08 +0300 Subject: [PATCH 53/63] Make search a callback --- lib/pleroma/search/database_search.ex | 1 + lib/pleroma/search/meilisearch.ex | 1 + lib/pleroma/search/search_backend.ex | 11 +++++++++-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex index 3735a5fab4..9a340abf15 100644 --- a/lib/pleroma/search/database_search.ex +++ b/lib/pleroma/search/database_search.ex @@ -15,6 +15,7 @@ defmodule Pleroma.Search.DatabaseSearch do @behaviour Pleroma.Search.SearchBackend + @impl true def search(user, search_query, options \\ []) do index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin limit = Enum.min([Keyword.get(options, :limit), 40]) diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 2e13b84073..4e88169d2c 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -75,6 +75,7 @@ def meili_delete!(path) do ) end + @impl true def search(user, query, options \\ []) do limit = Enum.min([Keyword.get(options, :limit), 40]) offset = Keyword.get(options, :offset, 0) diff --git a/lib/pleroma/search/search_backend.ex b/lib/pleroma/search/search_backend.ex index ed6bfd3292..a42e2f5f6c 100644 --- a/lib/pleroma/search/search_backend.ex +++ b/lib/pleroma/search/search_backend.ex @@ -1,10 +1,17 @@ defmodule Pleroma.Search.SearchBackend do + @doc """ + Search statuses with a query, restricting to only those the user should have access to. + """ + @callback search(user :: Pleroma.User.t(), query :: String.t(), options :: [any()]) :: [ + Pleroma.Activity.t() + ] + @doc """ Add the object associated with the activity to the search index. The whole activity is passed, to allow filtering on things such as scope. """ - @callback add_to_index(activity :: Pleroma.Activity.t()) :: nil + @callback add_to_index(activity :: Pleroma.Activity.t()) :: :ok | {:error, any()} @doc """ Remove the object from the index. @@ -13,5 +20,5 @@ defmodule Pleroma.Search.SearchBackend do is what contains the actual content and there is no need for fitlering when removing from index. """ - @callback remove_from_index(object :: Pleroma.Object.t()) :: nil + @callback remove_from_index(object :: Pleroma.Object.t()) :: {:ok, any()} | {:error, any()} end From 5ac67632384bfb284ac51f2a450d41cf3913378a Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 27 Aug 2022 00:31:36 +0300 Subject: [PATCH 54/63] Make add_to_index and remove_from_index report errors --- lib/mix/tasks/pleroma/search/meilisearch.ex | 4 +-- lib/pleroma/search/meilisearch.ex | 27 +++++++++++-------- lib/pleroma/workers/search_indexing_worker.ex | 4 --- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index d4a83c3cdb..72a5582282 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -9,7 +9,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do import Ecto.Query import Pleroma.Search.Meilisearch, - only: [meili_post: 2, meili_put: 2, meili_get: 1, meili_delete!: 1] + only: [meili_post: 2, meili_put: 2, meili_get: 1, meili_delete: 1] def run(["index"]) do start_pleroma() @@ -109,7 +109,7 @@ def run(["index"]) do def run(["clear"]) do start_pleroma() - meili_delete!("/indexes/objects/documents") + meili_delete("/indexes/objects/documents") end def run(["show-keys", master_key]) do diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 4e88169d2c..24789b00c4 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -62,17 +62,16 @@ def meili_put(path, params) do end end - def meili_delete!(path) do + def meili_delete(path) do endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) - {:ok, _} = - Pleroma.HTTP.request( - :delete, - Path.join(endpoint, path), - "", - meili_headers(), - [] - ) + Pleroma.HTTP.request( + :delete, + Path.join(endpoint, path), + "", + meili_headers(), + [] + ) end @impl true @@ -155,16 +154,22 @@ def add_to_index(activity) do with {:ok, res} <- result, true <- Map.has_key?(res, "uid") do - # Do nothing + # Added successfully + :ok else _ -> + # There was an error, report it Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}") + {:error, result} end + else + # The post isn't something we can search, that's ok + :ok end end @impl true def remove_from_index(object) do - meili_delete!("/indexes/objects/documents/#{object.id}") + meili_delete("/indexes/objects/documents/#{object.id}") end end diff --git a/lib/pleroma/workers/search_indexing_worker.ex b/lib/pleroma/workers/search_indexing_worker.ex index 70a8d42d06..43b7bad1e3 100644 --- a/lib/pleroma/workers/search_indexing_worker.ex +++ b/lib/pleroma/workers/search_indexing_worker.ex @@ -9,8 +9,6 @@ def perform(%Job{args: %{"op" => "add_to_index", "activity" => activity_id}}) do search_module = Pleroma.Config.get([Pleroma.Search, :module]) search_module.add_to_index(activity) - - :ok end def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) do @@ -19,7 +17,5 @@ def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) d search_module = Pleroma.Config.get([Pleroma.Search, :module]) search_module.remove_from_index(object) - - :ok end end From 6256822afd368e5f6b410d47c5ff9b584e50a461 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 27 Aug 2022 01:11:50 +0300 Subject: [PATCH 55/63] Check for updateId, not uid --- lib/pleroma/search/meilisearch.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 24789b00c4..0b90971b18 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -153,7 +153,7 @@ def add_to_index(activity) do ) with {:ok, res} <- result, - true <- Map.has_key?(res, "uid") do + true <- Map.has_key?(res, "updateId") do # Added successfully :ok else From 5a39866388c411f2bcee9848352f8c420513f34f Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sat, 27 Aug 2022 01:43:59 +0300 Subject: [PATCH 56/63] Specifically strip mentions for search indexing --- lib/mix/tasks/pleroma/search/meilisearch.ex | 1 + lib/pleroma/search/meilisearch.ex | 3 ++- priv/scrubbers/search_indexing.ex | 24 +++++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 priv/scrubbers/search_indexing.ex diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 72a5582282..8379a0c252 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -13,6 +13,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do def run(["index"]) do start_pleroma() + Pleroma.HTML.compile_scrubbers() meili_version = ( diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 0b90971b18..7af7f460a6 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -122,7 +122,8 @@ def object_to_search_data(object) do end content = - with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str), + with {:ok, scrubbed} <- + FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing), trimmed <- String.trim(scrubbed) do trimmed end diff --git a/priv/scrubbers/search_indexing.ex b/priv/scrubbers/search_indexing.ex new file mode 100644 index 0000000000..02756ab797 --- /dev/null +++ b/priv/scrubbers/search_indexing.ex @@ -0,0 +1,24 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.HTML.Scrubber.SearchIndexing do + @moduledoc """ + An HTML scrubbing policy that scrubs things for searching. + """ + + require FastSanitize.Sanitizer.Meta + alias FastSanitize.Sanitizer.Meta + + # Explicitly remove mentions + def scrub({:a, attrs, children}) do + if(Enum.any?(attrs, fn {att, val} -> att == "class" and String.contains?(val, "mention") end), + do: nil, + # Strip the tag itself, leave only children (text, presumably) + else: children + ) + end + + Meta.strip_comments() + Meta.strip_everything_not_covered() +end From c1402af2934219b6ab5dc40a7d87a8c916554647 Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Sun, 12 Nov 2023 14:49:15 +0400 Subject: [PATCH 57/63] B Getting: Add default implementation, delegate, prepare test support. --- config/test.exs | 2 ++ lib/pleroma/config/getting.ex | 7 +++++++ test/support/mocks.ex | 1 + 3 files changed, 10 insertions(+) diff --git a/config/test.exs b/config/test.exs index 23489d4522..5e8135a586 100644 --- a/config/test.exs +++ b/config/test.exs @@ -141,6 +141,8 @@ # https://dashbit.co/blog/speeding-up-re-compilation-of-elixir-projects config :phoenix, :plug_init_mode, :runtime +config :pleroma, :config_impl, Pleroma.UnstubbedConfigMock + if File.exists?("./config/test.secret.exs") do import_config "test.secret.exs" else diff --git a/lib/pleroma/config/getting.ex b/lib/pleroma/config/getting.ex index f9b66bba6e..0de4782ea5 100644 --- a/lib/pleroma/config/getting.ex +++ b/lib/pleroma/config/getting.ex @@ -5,4 +5,11 @@ defmodule Pleroma.Config.Getting do @callback get(any()) :: any() @callback get(any(), any()) :: any() + + def get(key), do: get(key, nil) + def get(key, default), do: impl().get(key, default) + + def impl() do + Application.get_env(:pleroma, :config_impl, Pleroma.Config) + end end diff --git a/test/support/mocks.ex b/test/support/mocks.ex index d167996bd8..9693095bad 100644 --- a/test/support/mocks.ex +++ b/test/support/mocks.ex @@ -26,5 +26,6 @@ Mox.defmock(Pleroma.Web.FederatorMock, for: Pleroma.Web.Federator.Publishing) Mox.defmock(Pleroma.ConfigMock, for: Pleroma.Config.Getting) +Mox.defmock(Pleroma.UnstubbedConfigMock, for: Pleroma.Config.Getting) Mox.defmock(Pleroma.LoggerMock, for: Pleroma.Logging) From d3f8950588b444dfdf46d87d5631720cc14a907c Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Sun, 12 Nov 2023 14:49:50 +0400 Subject: [PATCH 58/63] B MeiliSearch, SearchIndexingWorker: Use Config.Getting, make tests async. --- lib/pleroma/search/meilisearch.ex | 30 ++++--- lib/pleroma/workers/search_indexing_worker.ex | 6 +- test/pleroma/search/meilisearch_test.exs | 84 +++++++++++-------- 3 files changed, 72 insertions(+), 48 deletions(-) diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 7af7f460a6..eed9fca1c9 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -3,6 +3,7 @@ defmodule Pleroma.Search.Meilisearch do require Pleroma.Constants alias Pleroma.Activity + alias Pleroma.Config.Getting, as: Config import Pleroma.Search.DatabaseSearch import Ecto.Query @@ -10,14 +11,14 @@ defmodule Pleroma.Search.Meilisearch do @behaviour Pleroma.Search.SearchBackend defp meili_headers do - private_key = Pleroma.Config.get([Pleroma.Search.Meilisearch, :private_key]) + private_key = Config.get([Pleroma.Search.Meilisearch, :private_key]) [{"Content-Type", "application/json"}] ++ if is_nil(private_key), do: [], else: [{"Authorization", "Bearer #{private_key}"}] end def meili_get(path) do - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) result = Pleroma.HTTP.get( @@ -31,7 +32,7 @@ def meili_get(path) do end def meili_post(path, params) do - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) result = Pleroma.HTTP.post( @@ -46,7 +47,7 @@ def meili_post(path, params) do end def meili_put(path, params) do - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) result = Pleroma.HTTP.request( @@ -63,15 +64,20 @@ def meili_put(path, params) do end def meili_delete(path) do - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) - Pleroma.HTTP.request( - :delete, - Path.join(endpoint, path), - "", - meili_headers(), - [] - ) + with {:ok, _} <- + Pleroma.HTTP.request( + :delete, + Path.join(endpoint, path), + "", + meili_headers(), + [] + ) do + :ok + else + _ -> :error + end end @impl true diff --git a/lib/pleroma/workers/search_indexing_worker.ex b/lib/pleroma/workers/search_indexing_worker.ex index 43b7bad1e3..8476a2be50 100644 --- a/lib/pleroma/workers/search_indexing_worker.ex +++ b/lib/pleroma/workers/search_indexing_worker.ex @@ -3,10 +3,12 @@ defmodule Pleroma.Workers.SearchIndexingWorker do @impl Oban.Worker + alias Pleroma.Config.Getting, as: Config + def perform(%Job{args: %{"op" => "add_to_index", "activity" => activity_id}}) do activity = Pleroma.Activity.get_by_id_with_object(activity_id) - search_module = Pleroma.Config.get([Pleroma.Search, :module]) + search_module = Config.get([Pleroma.Search, :module]) search_module.add_to_index(activity) end @@ -14,7 +16,7 @@ def perform(%Job{args: %{"op" => "add_to_index", "activity" => activity_id}}) do def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) do object = Pleroma.Object.get_by_id(object_id) - search_module = Pleroma.Config.get([Pleroma.Search, :module]) + search_module = Config.get([Pleroma.Search, :module]) search_module.remove_from_index(object) end diff --git a/test/pleroma/search/meilisearch_test.exs b/test/pleroma/search/meilisearch_test.exs index 04a2d75d91..3a267385d1 100644 --- a/test/pleroma/search/meilisearch_test.exs +++ b/test/pleroma/search/meilisearch_test.exs @@ -5,51 +5,50 @@ defmodule Pleroma.Search.MeilisearchTest do require Pleroma.Constants - use Pleroma.DataCase + use Pleroma.DataCase, async: true use Oban.Testing, repo: Pleroma.Repo import Pleroma.Factory import Tesla.Mock - import Mock + import Mox alias Pleroma.Search.Meilisearch alias Pleroma.Web.CommonAPI alias Pleroma.Workers.SearchIndexingWorker - - setup_all do - Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) - :ok - end + alias Pleroma.UnstubbedConfigMock, as: Config describe "meilisearch" do - setup do: clear_config([Pleroma.Search, :module], Meilisearch) - - setup_with_mocks( - [ - {Meilisearch, [:passthrough], - [ - add_to_index: fn a -> passthrough([a]) end, - remove_from_index: fn a -> passthrough([a]) end, - meili_put: fn u, a -> passthrough([u, a]) end - ]} - ], - context, - do: {:ok, context} - ) - test "indexes a local post on creation" do user = insert(:user) - mock_global(fn - %{method: :put, url: "http://127.0.0.1:7700/indexes/objects/documents", body: body} -> + Tesla.Mock.mock(fn + %{ + method: :put, + url: "http://127.0.0.1:7700/indexes/objects/documents", + body: body + } -> assert match?( [%{"content" => "guys i just don't wanna leave the swamp"}], Jason.decode!(body) ) + # To make sure that the worker is called + send(self(), "posted_to_meilisearch") json(%{updateId: 1}) end) + Config + |> expect(:get, 3, fn + [Pleroma.Search, :module], nil -> + Meilisearch + + [Pleroma.Search.Meilisearch, :url], nil -> + "http://127.0.0.1:7700" + + [Pleroma.Search.Meilisearch, :private_key], nil -> + "secret" + end) + {:ok, activity} = CommonAPI.post(user, %{ status: "guys i just don't wanna leave the swamp", @@ -64,8 +63,7 @@ test "indexes a local post on creation" do ) assert :ok = perform_job(SearchIndexingWorker, args) - - assert_called(Meilisearch.add_to_index(activity)) + assert_received("posted_to_meilisearch") end test "doesn't index posts that are not public" do @@ -80,21 +78,26 @@ test "doesn't index posts that are not public" do args = %{"op" => "add_to_index", "activity" => activity.id} + Config + |> expect(:get, fn + [Pleroma.Search, :module], nil -> + Meilisearch + end) + assert_enqueued(worker: SearchIndexingWorker, args: args) assert :ok = perform_job(SearchIndexingWorker, args) - - assert_not_called(Meilisearch.meili_put(:_)) end) - - history = call_history(Meilisearch) - assert Enum.count(history) == 2 end test "deletes posts from index when deleted locally" do user = insert(:user) - mock_global(fn - %{method: :put, url: "http://127.0.0.1:7700/indexes/objects/documents", body: body} -> + Tesla.Mock.mock(fn + %{ + method: :put, + url: "http://127.0.0.1:7700/indexes/objects/documents", + body: body + } -> assert match?( [%{"content" => "guys i just don't wanna leave the swamp"}], Jason.decode!(body) @@ -103,10 +106,23 @@ test "deletes posts from index when deleted locally" do json(%{updateId: 1}) %{method: :delete, url: "http://127.0.0.1:7700/indexes/objects/documents/" <> id} -> + send(self(), "called_delete") assert String.length(id) > 1 json(%{updateId: 2}) end) + Config + |> expect(:get, 6, fn + [Pleroma.Search, :module], nil -> + Meilisearch + + [Pleroma.Search.Meilisearch, :url], nil -> + "http://127.0.0.1:7700" + + [Pleroma.Search.Meilisearch, :private_key], nil -> + "secret" + end) + {:ok, activity} = CommonAPI.post(user, %{ status: "guys i just don't wanna leave the swamp", @@ -123,7 +139,7 @@ test "deletes posts from index when deleted locally" do assert_enqueued(worker: SearchIndexingWorker, args: delete_args) assert :ok = perform_job(SearchIndexingWorker, delete_args) - assert_called(Meilisearch.remove_from_index(:_)) + assert_received("called_delete") end end end From 5208bd8a9510f6aee8faf888643f07d059968ad4 Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Sun, 12 Nov 2023 15:00:51 +0400 Subject: [PATCH 59/63] Add changelog. --- changelog.d/meilisearch.add | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/meilisearch.add diff --git a/changelog.d/meilisearch.add b/changelog.d/meilisearch.add new file mode 100644 index 0000000000..4856eea2ed --- /dev/null +++ b/changelog.d/meilisearch.add @@ -0,0 +1 @@ +Add meilisearch, make search engines pluggable From 5996bef7cdbe311443df3a75daf3968aee92eba6 Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Sun, 12 Nov 2023 15:54:16 +0400 Subject: [PATCH 60/63] Fix most tests that call SearchIndexWorker. --- test/mix/tasks/pleroma/digest_test.exs | 5 +++++ test/mix/tasks/pleroma/user_test.exs | 5 +++++ test/pleroma/conversation_test.exs | 5 +++++ test/pleroma/notification_test.exs | 5 +++++ test/pleroma/user_test.exs | 5 +++++ .../web/activity_pub/activity_pub_controller_test.exs | 5 +++++ .../web/admin_api/controllers/user_controller_test.exs | 5 +++++ .../web/mastodon_api/controllers/account_controller_test.exs | 5 +++++ .../controllers/notification_controller_test.exs | 5 +++++ .../web/mastodon_api/controllers/search_controller_test.exs | 5 +++++ .../web/mastodon_api/controllers/status_controller_test.exs | 5 +++++ .../web/mastodon_api/views/notification_view_test.exs | 5 +++++ .../controllers/emoji_reaction_controller_test.exs | 5 +++++ test/pleroma/workers/cron/digest_emails_worker_test.exs | 5 +++++ test/pleroma/workers/cron/new_users_digest_worker_test.exs | 5 +++++ 15 files changed, 75 insertions(+) diff --git a/test/mix/tasks/pleroma/digest_test.exs b/test/mix/tasks/pleroma/digest_test.exs index d2a8606c7d..08482aadb6 100644 --- a/test/mix/tasks/pleroma/digest_test.exs +++ b/test/mix/tasks/pleroma/digest_test.exs @@ -23,6 +23,11 @@ defmodule Mix.Tasks.Pleroma.DigestTest do setup do: clear_config([Pleroma.Emails.Mailer, :enabled], true) + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + describe "pleroma.digest test" do test "Sends digest to the given user" do user1 = insert(:user) diff --git a/test/mix/tasks/pleroma/user_test.exs b/test/mix/tasks/pleroma/user_test.exs index 4fdf6912b7..c9bcf29513 100644 --- a/test/mix/tasks/pleroma/user_test.exs +++ b/test/mix/tasks/pleroma/user_test.exs @@ -20,6 +20,11 @@ defmodule Mix.Tasks.Pleroma.UserTest do import Mock import Pleroma.Factory + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + setup_all do Mix.shell(Mix.Shell.Process) diff --git a/test/pleroma/conversation_test.exs b/test/pleroma/conversation_test.exs index 94897e7eab..809c1951ad 100644 --- a/test/pleroma/conversation_test.exs +++ b/test/pleroma/conversation_test.exs @@ -13,6 +13,11 @@ defmodule Pleroma.ConversationTest do setup_all do: clear_config([:instance, :federating], true) + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + test "it goes through old direct conversations" do user = insert(:user) other_user = insert(:user) diff --git a/test/pleroma/notification_test.exs b/test/pleroma/notification_test.exs index 71af9acb81..4cf14e65bf 100644 --- a/test/pleroma/notification_test.exs +++ b/test/pleroma/notification_test.exs @@ -21,6 +21,11 @@ defmodule Pleroma.NotificationTest do alias Pleroma.Web.Push alias Pleroma.Web.Streamer + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + describe "create_notifications" do test "never returns nil" do user = insert(:user) diff --git a/test/pleroma/user_test.exs b/test/pleroma/user_test.exs index 7f60b959af..b9df527a08 100644 --- a/test/pleroma/user_test.exs +++ b/test/pleroma/user_test.exs @@ -19,6 +19,11 @@ defmodule Pleroma.UserTest do import ExUnit.CaptureLog import Swoosh.TestAssertions + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + setup_all do Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) :ok diff --git a/test/pleroma/web/activity_pub/activity_pub_controller_test.exs b/test/pleroma/web/activity_pub/activity_pub_controller_test.exs index 62eb9b5a32..0dc61c2e56 100644 --- a/test/pleroma/web/activity_pub/activity_pub_controller_test.exs +++ b/test/pleroma/web/activity_pub/activity_pub_controller_test.exs @@ -25,6 +25,11 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubControllerTest do require Pleroma.Constants + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + setup_all do Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) :ok diff --git a/test/pleroma/web/admin_api/controllers/user_controller_test.exs b/test/pleroma/web/admin_api/controllers/user_controller_test.exs index bb9dcb4aae..8edfda54c3 100644 --- a/test/pleroma/web/admin_api/controllers/user_controller_test.exs +++ b/test/pleroma/web/admin_api/controllers/user_controller_test.exs @@ -19,6 +19,11 @@ defmodule Pleroma.Web.AdminAPI.UserControllerTest do alias Pleroma.Web.Endpoint alias Pleroma.Web.MediaProxy + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + setup_all do Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) diff --git a/test/pleroma/web/mastodon_api/controllers/account_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/account_controller_test.exs index 128e60b0a2..d8e5f9d396 100644 --- a/test/pleroma/web/mastodon_api/controllers/account_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/account_controller_test.exs @@ -18,6 +18,11 @@ defmodule Pleroma.Web.MastodonAPI.AccountControllerTest do import Pleroma.Factory + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + describe "account fetching" do test "works by id" do %User{id: user_id} = insert(:user) diff --git a/test/pleroma/web/mastodon_api/controllers/notification_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/notification_controller_test.exs index 1524df98fa..350b935d78 100644 --- a/test/pleroma/web/mastodon_api/controllers/notification_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/notification_controller_test.exs @@ -12,6 +12,11 @@ defmodule Pleroma.Web.MastodonAPI.NotificationControllerTest do import Pleroma.Factory + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + test "does NOT render account/pleroma/relationship by default" do %{user: user, conn: conn} = oauth_access(["read:notifications"]) other_user = insert(:user) diff --git a/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs index 0a9240b70f..19dee25d71 100644 --- a/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs @@ -13,6 +13,11 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do import Tesla.Mock import Mock + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + setup_all do mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) :ok diff --git a/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs index de3b52e265..db2688f80b 100644 --- a/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs @@ -27,6 +27,11 @@ defmodule Pleroma.Web.MastodonAPI.StatusControllerTest do setup do: clear_config([:mrf, :policies]) setup do: clear_config([:mrf_keyword, :reject]) + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + describe "posting statuses" do setup do: oauth_access(["write:statuses"]) diff --git a/test/pleroma/web/mastodon_api/views/notification_view_test.exs b/test/pleroma/web/mastodon_api/views/notification_view_test.exs index ddbe4557f1..47425d2a9e 100644 --- a/test/pleroma/web/mastodon_api/views/notification_view_test.exs +++ b/test/pleroma/web/mastodon_api/views/notification_view_test.exs @@ -22,6 +22,11 @@ defmodule Pleroma.Web.MastodonAPI.NotificationViewTest do alias Pleroma.Web.PleromaAPI.Chat.MessageReferenceView import Pleroma.Factory + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + defp test_notifications_rendering(notifications, user, expected_result) do result = NotificationView.render("index.json", %{notifications: notifications, for: user}) diff --git a/test/pleroma/web/pleroma_api/controllers/emoji_reaction_controller_test.exs b/test/pleroma/web/pleroma_api/controllers/emoji_reaction_controller_test.exs index 21e7d4839f..8c2dcc1bb9 100644 --- a/test/pleroma/web/pleroma_api/controllers/emoji_reaction_controller_test.exs +++ b/test/pleroma/web/pleroma_api/controllers/emoji_reaction_controller_test.exs @@ -13,6 +13,11 @@ defmodule Pleroma.Web.PleromaAPI.EmojiReactionControllerTest do import Pleroma.Factory + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + test "PUT /api/v1/pleroma/statuses/:id/reactions/:emoji", %{conn: conn} do user = insert(:user) other_user = insert(:user) diff --git a/test/pleroma/workers/cron/digest_emails_worker_test.exs b/test/pleroma/workers/cron/digest_emails_worker_test.exs index 851f4d63ab..e0bdf303e8 100644 --- a/test/pleroma/workers/cron/digest_emails_worker_test.exs +++ b/test/pleroma/workers/cron/digest_emails_worker_test.exs @@ -13,6 +13,11 @@ defmodule Pleroma.Workers.Cron.DigestEmailsWorkerTest do setup do: clear_config([:email_notifications, :digest]) + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + setup do clear_config([:email_notifications, :digest], %{ active: true, diff --git a/test/pleroma/workers/cron/new_users_digest_worker_test.exs b/test/pleroma/workers/cron/new_users_digest_worker_test.exs index 84914876cc..0e4234cc8e 100644 --- a/test/pleroma/workers/cron/new_users_digest_worker_test.exs +++ b/test/pleroma/workers/cron/new_users_digest_worker_test.exs @@ -10,6 +10,11 @@ defmodule Pleroma.Workers.Cron.NewUsersDigestWorkerTest do alias Pleroma.Web.CommonAPI alias Pleroma.Workers.Cron.NewUsersDigestWorker + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + test "it sends new users digest emails" do yesterday = NaiveDateTime.utc_now() |> Timex.shift(days: -1) admin = insert(:user, %{is_admin: true}) From a1a25029da74949a79c73d400b6f2bc0bf1dc01a Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Sun, 12 Nov 2023 16:19:54 +0400 Subject: [PATCH 61/63] B DatabaseSearch: Fix local-only search. --- lib/pleroma/search/database_search.ex | 25 ++++++++++++++----- ...earch_test.ex => database_search_test.exs} | 8 +++--- 2 files changed, 23 insertions(+), 10 deletions(-) rename test/pleroma/search/{database_search_test.ex => database_search_test.exs} (88%) diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex index 9a340abf15..f4c4057739 100644 --- a/lib/pleroma/search/database_search.ex +++ b/lib/pleroma/search/database_search.ex @@ -8,6 +8,7 @@ defmodule Pleroma.Search.DatabaseSearch do alias Pleroma.Pagination alias Pleroma.User alias Pleroma.Web.ActivityPub.Visibility + alias Pleroma.Config require Pleroma.Constants @@ -17,7 +18,7 @@ defmodule Pleroma.Search.DatabaseSearch do @impl true def search(user, search_query, options \\ []) do - index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin + index_type = if Config.get([:database, :rum_enabled]), do: :rum, else: :gin limit = Enum.min([Keyword.get(options, :limit), 40]) offset = Keyword.get(options, :offset, 0) author = Keyword.get(options, :author) @@ -33,7 +34,7 @@ def search(user, search_query, options \\ []) do Activity |> Activity.with_preloaded_object() |> Activity.restrict_deactivated_users() - |> restrict_public() + |> restrict_public(user) |> query_with(index_type, search_query, search_function) |> maybe_restrict_local(user) |> maybe_restrict_author(author) @@ -49,10 +50,10 @@ def search(user, search_query, options \\ []) do end @impl true - def add_to_index(_activity), do: nil + def add_to_index(_activity), do: :ok @impl true - def remove_from_index(_object), do: nil + def remove_from_index(_object), do: :ok def maybe_restrict_author(query, %User{} = author) do Activity.Queries.by_author(query, author) @@ -66,7 +67,19 @@ def maybe_restrict_blocked(query, %User{} = user) do def maybe_restrict_blocked(query, _), do: query - def restrict_public(q) do + defp restrict_public(q, user) when not is_nil(user) do + intended_recipients = [ + Pleroma.Constants.as_public(), + Pleroma.Web.ActivityPub.Utils.as_local_public() + ] + + from([a, o] in q, + where: fragment("?->>'type' = 'Create'", a.data), + where: fragment("? && ?", ^intended_recipients, a.recipients) + ) + end + + defp restrict_public(q, _user) do from([a, o] in q, where: fragment("?->>'type' = 'Create'", a.data), where: ^Pleroma.Constants.as_public() in a.recipients @@ -134,7 +147,7 @@ defp query_with(q, :rum, search_query, :websearch) do end def maybe_restrict_local(q, user) do - limit = Pleroma.Config.get([:instance, :limit_to_local_content], :unauthenticated) + limit = Config.get([:instance, :limit_to_local_content], :unauthenticated) case {limit, user} do {:all, _} -> restrict_local(q) diff --git a/test/pleroma/search/database_search_test.ex b/test/pleroma/search/database_search_test.exs similarity index 88% rename from test/pleroma/search/database_search_test.ex rename to test/pleroma/search/database_search_test.exs index c123d0b84c..6c47ff4254 100644 --- a/test/pleroma/search/database_search_test.ex +++ b/test/pleroma/search/database_search_test.exs @@ -3,7 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Search.DatabaseSearchTest do - alias Pleroma.Search.DatabaseSearch + alias Pleroma.Search.DatabaseSearch, as: Search alias Pleroma.Web.CommonAPI import Pleroma.Factory @@ -13,7 +13,7 @@ test "it finds something" do user = insert(:user) {:ok, post} = CommonAPI.post(user, %{status: "it's wednesday my dudes"}) - [result] = DatabaseSearch.search(nil, "wednesday") + [result] = Search.search(nil, "wednesday") assert result.id == post.id end @@ -45,7 +45,7 @@ test "using plainto_tsquery on postgres < 11" do {:ok, _post2} = CommonAPI.post(user, %{status: "it's wednesday my bros"}) # plainto doesn't understand complex queries - assert [result] = DatabaseSearch.search(nil, "wednesday -dudes") + assert [result] = Search.search(nil, "wednesday -dudes") assert result.id == post.id end @@ -55,7 +55,7 @@ test "using websearch_to_tsquery" do {:ok, _post} = CommonAPI.post(user, %{status: "it's wednesday my dudes"}) {:ok, other_post} = CommonAPI.post(user, %{status: "it's wednesday my bros"}) - assert [result] = DatabaseSearch.search(nil, "wednesday -dudes") + assert [result] = Search.search(nil, "wednesday -dudes") assert result.id == other_post.id end From 59018d73c366d9297efe83d290c717d1a3e4756a Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Sun, 12 Nov 2023 16:43:50 +0400 Subject: [PATCH 62/63] B Meilisearch: Update to current API responses. --- lib/pleroma/search/meilisearch.ex | 5 ++--- test/pleroma/search/meilisearch_test.exs | 21 ++++++++++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index eed9fca1c9..2bff663e88 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -76,7 +76,7 @@ def meili_delete(path) do ) do :ok else - _ -> :error + _ -> {:error, "Could not remove from index"} end end @@ -159,8 +159,7 @@ def add_to_index(activity) do [maybe_search_data] ) - with {:ok, res} <- result, - true <- Map.has_key?(res, "updateId") do + with {:ok, %{"status" => "enqueued"}} <- result do # Added successfully :ok else diff --git a/test/pleroma/search/meilisearch_test.exs b/test/pleroma/search/meilisearch_test.exs index 3a267385d1..39592c5f8a 100644 --- a/test/pleroma/search/meilisearch_test.exs +++ b/test/pleroma/search/meilisearch_test.exs @@ -34,7 +34,15 @@ test "indexes a local post on creation" do # To make sure that the worker is called send(self(), "posted_to_meilisearch") - json(%{updateId: 1}) + + %{ + "enqueuedAt" => "2023-11-12T12:36:46.927517Z", + "indexUid" => "objects", + "status" => "enqueued", + "taskUid" => 6, + "type" => "documentAdditionOrUpdate" + } + |> json() end) Config @@ -103,12 +111,19 @@ test "deletes posts from index when deleted locally" do Jason.decode!(body) ) - json(%{updateId: 1}) + %{ + "enqueuedAt" => "2023-11-12T12:36:46.927517Z", + "indexUid" => "objects", + "status" => "enqueued", + "taskUid" => 6, + "type" => "documentAdditionOrUpdate" + } + |> json() %{method: :delete, url: "http://127.0.0.1:7700/indexes/objects/documents/" <> id} -> send(self(), "called_delete") assert String.length(id) > 1 - json(%{updateId: 2}) + json(%{}) end) Config From 3d62c71edf8782c5ceae5a0ea3ba5ec08dc5b948 Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Sun, 12 Nov 2023 17:13:27 +0400 Subject: [PATCH 63/63] Credo fixes. --- lib/pleroma/config/getting.ex | 2 +- lib/pleroma/search/database_search.ex | 2 +- test/pleroma/search/meilisearch_test.exs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/pleroma/config/getting.ex b/lib/pleroma/config/getting.ex index 0de4782ea5..ec93fd02ae 100644 --- a/lib/pleroma/config/getting.ex +++ b/lib/pleroma/config/getting.ex @@ -9,7 +9,7 @@ defmodule Pleroma.Config.Getting do def get(key), do: get(key, nil) def get(key, default), do: impl().get(key, default) - def impl() do + def impl do Application.get_env(:pleroma, :config_impl, Pleroma.Config) end end diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex index f4c4057739..c6311e0c77 100644 --- a/lib/pleroma/search/database_search.ex +++ b/lib/pleroma/search/database_search.ex @@ -4,11 +4,11 @@ defmodule Pleroma.Search.DatabaseSearch do alias Pleroma.Activity + alias Pleroma.Config alias Pleroma.Object.Fetcher alias Pleroma.Pagination alias Pleroma.User alias Pleroma.Web.ActivityPub.Visibility - alias Pleroma.Config require Pleroma.Constants diff --git a/test/pleroma/search/meilisearch_test.exs b/test/pleroma/search/meilisearch_test.exs index 39592c5f8a..eea4543232 100644 --- a/test/pleroma/search/meilisearch_test.exs +++ b/test/pleroma/search/meilisearch_test.exs @@ -13,9 +13,9 @@ defmodule Pleroma.Search.MeilisearchTest do import Mox alias Pleroma.Search.Meilisearch + alias Pleroma.UnstubbedConfigMock, as: Config alias Pleroma.Web.CommonAPI alias Pleroma.Workers.SearchIndexingWorker - alias Pleroma.UnstubbedConfigMock, as: Config describe "meilisearch" do test "indexes a local post on creation" do