From 1bf3ae07b6719a762310615811a317035175ad2e Mon Sep 17 00:00:00 2001 From: faried nawaz Date: Mon, 18 Sep 2023 02:13:01 +0500 Subject: [PATCH 01/22] add options to mix pleroma.database prune_objects to delete more activities --- lib/mix/tasks/pleroma/database.ex | 165 +++++++- test/mix/tasks/pleroma/database_test.exs | 487 ++++++++++++++++++++++- 2 files changed, 625 insertions(+), 27 deletions(-) diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 93ee57dc33..13ac6536c4 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -67,43 +67,168 @@ def run(["prune_objects" | args]) do OptionParser.parse( args, strict: [ - vacuum: :boolean + vacuum: :boolean, + keep_threads: :boolean, + keep_non_public: :boolean, + prune_orphaned_activities: :boolean ] ) start_pleroma() deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + time_deadline = NaiveDateTime.utc_now() |> NaiveDateTime.add(-(deadline * 86_400)) - Logger.info("Pruning objects older than #{deadline} days") + log_message = "Pruning objects older than #{deadline} days" - time_deadline = - NaiveDateTime.utc_now() - |> NaiveDateTime.add(-(deadline * 86_400)) + log_message = + if Keyword.get(options, :keep_non_public) do + log_message <> ", keeping non public posts" + else + log_message + end - from(o in Object, - where: - fragment( - "?->'to' \\? ? OR ?->'cc' \\? ?", - o.data, - ^Pleroma.Constants.as_public(), - o.data, - ^Pleroma.Constants.as_public() - ), - where: o.inserted_at < ^time_deadline, - where: + log_message = + if Keyword.get(options, :keep_threads) do + log_message <> ", keeping threads intact" + else + log_message + end + + log_message = + if Keyword.get(options, :prune_orphaned_activities) do + log_message <> ", pruning orphaned activities" + else + log_message + end + + log_message = + if Keyword.get(options, :vacuum) do + log_message <> + ", doing a full vacuum (you shouldn't do this as a recurring maintanance task)" + else + log_message + end + + Logger.info(log_message) + + if Keyword.get(options, :keep_threads) do + # We want to delete objects from threads where + # 1. the newest post is still old + # 2. none of the activities is local + # 3. none of the activities is bookmarked + # 4. optionally none of the posts is non-public + deletable_context = + if Keyword.get(options, :keep_non_public) do + Pleroma.Activity + |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) + |> group_by([a], fragment("? ->> 'context'::text", a.data)) + |> having( + [a], + not fragment( + # Posts (checked on Create Activity) is non-public + "bool_or((not(?->'to' \\? ? OR ?->'cc' \\? ?)) and ? ->> 'type' = 'Create')", + a.data, + ^Pleroma.Constants.as_public(), + a.data, + ^Pleroma.Constants.as_public(), + a.data + ) + ) + else + Pleroma.Activity + |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id) + |> group_by([a], fragment("? ->> 'context'::text", a.data)) + end + |> having([a], max(a.updated_at) < ^time_deadline) + |> having([a], not fragment("bool_or(?)", a.local)) + |> having([_, b], fragment("max(?::text) is null", b.id)) + |> select([a], fragment("? ->> 'context'::text", a.data)) + + Pleroma.Object + |> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context)) + else + if Keyword.get(options, :keep_non_public) do + Pleroma.Object + |> where( + [o], + fragment( + "?->'to' \\? ? OR ?->'cc' \\? ?", + o.data, + ^Pleroma.Constants.as_public(), + o.data, + ^Pleroma.Constants.as_public() + ) + ) + else + Pleroma.Object + end + |> where([o], o.updated_at < ^time_deadline) + |> where( + [o], fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) - ) + ) + end |> Repo.delete_all(timeout: :infinity) - prune_hashtags_query = """ + if !Keyword.get(options, :keep_threads) do + # Without the --keep-threads option, it's possible that bookmarked + # objects have been deleted. We remove the corresponding bookmarks. + """ + delete from public.bookmarks + where id in ( + select b.id from public.bookmarks b + left join public.activities a on b.activity_id = a.id + left join public.objects o on a."data" ->> 'object' = o.data ->> 'id' + where o.id is null + ) + """ + |> Repo.query([], timeout: :infinity) + end + + if Keyword.get(options, :prune_orphaned_activities) do + # Prune activities who link to a single object + """ + delete from public.activities + where id in ( + select a.id from public.activities a + left join public.objects o on a.data ->> 'object' = o.data ->> 'id' + left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id' + left join public.users u on a.data ->> 'object' = u.ap_id + where not a.local + and jsonb_typeof(a."data" -> 'object') = 'string' + and o.id is null + and a2.id is null + and u.id is null + ) + """ + |> Repo.query([], timeout: :infinity) + + # Prune activities who link to an array of objects + """ + delete from public.activities + where id in ( + select a.id from public.activities a + join json_array_elements_text((a."data" -> 'object')::json) as j on jsonb_typeof(a."data" -> 'object') = 'array' + left join public.objects o on j.value = o.data ->> 'id' + left join public.activities a2 on j.value = a2.data ->> 'id' + left join public.users u on j.value = u.ap_id + group by a.id + having max(o.data ->> 'id') is null + and max(a2.data ->> 'id') is null + and max(u.ap_id) is null + ) + """ + |> Repo.query([], timeout: :infinity) + end + + """ DELETE FROM hashtags AS ht WHERE NOT EXISTS ( SELECT 1 FROM hashtags_objects hto WHERE ht.id = hto.hashtag_id) """ - - Repo.query(prune_hashtags_query) + |> Repo.query() if Keyword.get(options, :vacuum) do Maintenance.vacuum("full") diff --git a/test/mix/tasks/pleroma/database_test.exs b/test/mix/tasks/pleroma/database_test.exs index fbc9391712..01fd97e2dc 100644 --- a/test/mix/tasks/pleroma/database_test.exs +++ b/test/mix/tasks/pleroma/database_test.exs @@ -7,6 +7,7 @@ defmodule Mix.Tasks.Pleroma.DatabaseTest do use Oban.Testing, repo: Pleroma.Repo alias Pleroma.Activity + alias Pleroma.Bookmark alias Pleroma.Object alias Pleroma.Repo alias Pleroma.User @@ -45,28 +46,500 @@ test "it replaces objects with references" do end describe "prune_objects" do - test "it prunes old objects from the database" do - insert(:note) + setup do deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1 - date = + old_insert_date = Timex.now() |> Timex.shift(days: -deadline) |> Timex.to_naive_datetime() |> NaiveDateTime.truncate(:second) - %{id: id} = + %{old_insert_date: old_insert_date} + end + + test "it prunes old objects from the database", %{old_insert_date: old_insert_date} do + insert(:note) + + %{id: note_remote_public_id} = :note |> insert() - |> Ecto.Changeset.change(%{inserted_at: date}) + |> Ecto.Changeset.change(%{updated_at: old_insert_date}) |> Repo.update!() - assert length(Repo.all(Object)) == 2 + note_remote_non_public = + %{id: note_remote_non_public_id, data: note_remote_non_public_data} = + :note + |> insert() + + note_remote_non_public + |> Ecto.Changeset.change(%{ + updated_at: old_insert_date, + data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end) + }) + |> Repo.update!() + + assert length(Repo.all(Object)) == 3 Mix.Tasks.Pleroma.Database.run(["prune_objects"]) assert length(Repo.all(Object)) == 1 - refute Object.get_by_id(id) + refute Object.get_by_id(note_remote_public_id) + refute Object.get_by_id(note_remote_non_public_id) + end + + test "it cleans up bookmarks", %{old_insert_date: old_insert_date} do + user = insert(:user) + {:ok, old_object_activity} = CommonAPI.post(user, %{status: "yadayada"}) + + Repo.one(Object) + |> Ecto.Changeset.change(%{updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, new_object_activity} = CommonAPI.post(user, %{status: "yadayada"}) + + {:ok, _} = Bookmark.create(user.id, old_object_activity.id) + {:ok, _} = Bookmark.create(user.id, new_object_activity.id) + + assert length(Repo.all(Object)) == 2 + assert length(Repo.all(Bookmark)) == 2 + + Mix.Tasks.Pleroma.Database.run(["prune_objects"]) + + assert length(Repo.all(Object)) == 1 + assert length(Repo.all(Bookmark)) == 1 + refute Bookmark.get(user.id, old_object_activity.id) + end + + test "with the --keep-non-public option it still keeps non-public posts even if they are not local", + %{old_insert_date: old_insert_date} do + insert(:note) + + %{id: note_remote_id} = + :note + |> insert() + |> Ecto.Changeset.change(%{updated_at: old_insert_date}) + |> Repo.update!() + + note_remote_non_public = + %{data: note_remote_non_public_data} = + :note + |> insert() + + note_remote_non_public + |> Ecto.Changeset.change(%{ + updated_at: old_insert_date, + data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end) + }) + |> Repo.update!() + + assert length(Repo.all(Object)) == 3 + + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-non-public"]) + + assert length(Repo.all(Object)) == 2 + refute Object.get_by_id(note_remote_id) + end + + test "with the --keep-threads and --keep-non-public option it keeps old threads with non-public replies even if the interaction is not local", + %{old_insert_date: old_insert_date} do + # For non-public we only check Create Activities because only these are relevant for threads + # Flags are always non-public, Announces from relays can be non-public... + + remote_user1 = insert(:user, local: false) + remote_user2 = insert(:user, local: false) + + # Old remote non-public reply (should be kept) + {:ok, old_remote_post1_activity} = + CommonAPI.post(remote_user1, %{status: "some thing", local: false}) + + old_remote_post1_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_remote_non_public_reply_activity} = + CommonAPI.post(remote_user2, %{ + status: "some reply", + in_reply_to_status_id: old_remote_post1_activity.id + }) + + old_remote_non_public_reply_activity + |> Ecto.Changeset.change(%{ + local: false, + updated_at: old_insert_date, + data: old_remote_non_public_reply_activity.data |> update_in(["to"], fn _ -> [] end) + }) + |> Repo.update!() + + # Old remote non-public Announce (should be removed) + {:ok, old_remote_post2_activity = %{data: %{"object" => old_remote_post2_id}}} = + CommonAPI.post(remote_user1, %{status: "some thing", local: false}) + + old_remote_post2_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_remote_non_public_repeat_activity} = + CommonAPI.repeat(old_remote_post2_activity.id, remote_user2) + + old_remote_non_public_repeat_activity + |> Ecto.Changeset.change(%{ + local: false, + updated_at: old_insert_date, + data: old_remote_non_public_repeat_activity.data |> update_in(["to"], fn _ -> [] end) + }) + |> Repo.update!() + + assert length(Repo.all(Object)) == 3 + + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads", "--keep-non-public"]) + + Repo.all(Pleroma.Activity) + assert length(Repo.all(Object)) == 2 + refute Object.get_by_ap_id(old_remote_post2_id) + end + + test "with the --keep-threads option it still keeps non-old threads even with no local interactions" do + remote_user = insert(:user, local: false) + remote_user2 = insert(:user, local: false) + + {:ok, remote_post_activity} = + CommonAPI.post(remote_user, %{status: "some thing", local: false}) + + {:ok, remote_post_reply_activity} = + CommonAPI.post(remote_user2, %{ + status: "some reply", + in_reply_to_status_id: remote_post_activity.id + }) + + remote_post_activity + |> Ecto.Changeset.change(%{local: false}) + |> Repo.update!() + + remote_post_reply_activity + |> Ecto.Changeset.change(%{local: false}) + |> Repo.update!() + + assert length(Repo.all(Object)) == 2 + + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"]) + + assert length(Repo.all(Object)) == 2 + end + + test "with the --keep-threads option it deletes old threads with no local interaction", %{ + old_insert_date: old_insert_date + } do + remote_user = insert(:user, local: false) + remote_user2 = insert(:user, local: false) + + {:ok, old_remote_post_activity} = + CommonAPI.post(remote_user, %{status: "some thing", local: false}) + + old_remote_post_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_remote_post_reply_activity} = + CommonAPI.post(remote_user2, %{ + status: "some reply", + in_reply_to_status_id: old_remote_post_activity.id + }) + + old_remote_post_reply_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_favourite_activity} = + CommonAPI.favorite(remote_user2, old_remote_post_activity.id) + + old_favourite_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_repeat_activity} = CommonAPI.repeat(old_remote_post_activity.id, remote_user2) + + old_repeat_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + assert length(Repo.all(Object)) == 2 + + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"]) + + assert length(Repo.all(Object)) == 0 + end + + test "with the --keep-threads option it keeps old threads with local interaction", %{ + old_insert_date: old_insert_date + } do + remote_user = insert(:user, local: false) + local_user = insert(:user, local: true) + + # local reply + {:ok, old_remote_post1_activity} = + CommonAPI.post(remote_user, %{status: "some thing", local: false}) + + old_remote_post1_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_local_post2_reply_activity} = + CommonAPI.post(local_user, %{ + status: "some reply", + in_reply_to_status_id: old_remote_post1_activity.id + }) + + old_local_post2_reply_activity + |> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date}) + |> Repo.update!() + + # local Like + {:ok, old_remote_post3_activity} = + CommonAPI.post(remote_user, %{status: "some thing", local: false}) + + old_remote_post3_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_favourite_activity} = CommonAPI.favorite(local_user, old_remote_post3_activity.id) + + old_favourite_activity + |> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date}) + |> Repo.update!() + + # local Announce + {:ok, old_remote_post4_activity} = + CommonAPI.post(remote_user, %{status: "some thing", local: false}) + + old_remote_post4_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + {:ok, old_repeat_activity} = CommonAPI.repeat(old_remote_post4_activity.id, local_user) + + old_repeat_activity + |> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date}) + |> Repo.update!() + + assert length(Repo.all(Object)) == 4 + + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"]) + + assert length(Repo.all(Object)) == 4 + end + + test "with the --keep-threads option it keeps old threads with bookmarked posts", %{ + old_insert_date: old_insert_date + } do + remote_user = insert(:user, local: false) + local_user = insert(:user, local: true) + + {:ok, old_remote_post_activity} = + CommonAPI.post(remote_user, %{status: "some thing", local: false}) + + old_remote_post_activity + |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date}) + |> Repo.update!() + + Pleroma.Bookmark.create(local_user.id, old_remote_post_activity.id) + + assert length(Repo.all(Object)) == 1 + + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"]) + + assert length(Repo.all(Object)) == 1 + end + + test "We don't have unexpected tables which may contain objects that are referenced by activities" do + # We can delete orphaned activities. For that we look for the objects they reference in the 'objects', 'activities', and 'users' table. + # If someone adds another table with objects (idk, maybe with separate relations, or collections or w/e), then we need to make sure we + # add logic for that in the 'prune_objects' task so that we don't wrongly delete their corresponding activities. + # So when someone adds (or removes) a table, this test will fail. + # Either the table contains objects which can be referenced from the activities table + # => in that case the prune_objects job should be adapted so we don't delete activities who still have the referenced object. + # Or it doesn't contain objects which can be referenced from the activities table + # => in that case you can add/remove the table to/from this (sorted) list. + + assert Repo.query!( + "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';" + ).rows + |> Enum.sort() == [ + ["activities"], + ["announcement_read_relationships"], + ["announcements"], + ["apps"], + ["backups"], + ["bookmarks"], + ["chat_message_references"], + ["chats"], + ["config"], + ["conversation_participation_recipient_ships"], + ["conversation_participations"], + ["conversations"], + ["counter_cache"], + ["data_migration_failed_ids"], + ["data_migrations"], + ["deliveries"], + ["filters"], + ["following_relationships"], + ["hashtags"], + ["hashtags_objects"], + ["instances"], + ["lists"], + ["markers"], + ["mfa_tokens"], + ["moderation_log"], + ["notifications"], + ["oauth_authorizations"], + ["oauth_tokens"], + ["oban_jobs"], + ["oban_peers"], + ["objects"], + ["password_reset_tokens"], + ["push_subscriptions"], + ["registrations"], + ["report_notes"], + ["scheduled_activities"], + ["schema_migrations"], + ["thread_mutes"], + # ["user_follows_hashtag"], # not in pleroma + # ["user_frontend_setting_profiles"], # not in pleroma + ["user_invite_tokens"], + ["user_notes"], + ["user_relationships"], + ["users"] + ] + end + + test "it prunes orphaned activities with the --prune-orphaned-activities" do + # Add a remote activity which references an Object + %Object{} |> Map.merge(%{data: %{"id" => "object_for_activity"}}) |> Repo.insert() + + %Activity{} + |> Map.merge(%{ + local: false, + data: %{"id" => "remote_activity_with_object", "object" => "object_for_activity"} + }) + |> Repo.insert() + + # Add a remote activity which references an activity + %Activity{} + |> Map.merge(%{ + local: false, + data: %{ + "id" => "remote_activity_with_activity", + "object" => "remote_activity_with_object" + } + }) + |> Repo.insert() + + # Add a remote activity which references an Actor + %User{} |> Map.merge(%{ap_id: "actor"}) |> Repo.insert() + + %Activity{} + |> Map.merge(%{ + local: false, + data: %{"id" => "remote_activity_with_actor", "object" => "actor"} + }) + |> Repo.insert() + + # Add a remote activity without existing referenced object, activity or actor + %Activity{} + |> Map.merge(%{ + local: false, + data: %{ + "id" => "remote_activity_without_existing_referenced_object", + "object" => "non_existing" + } + }) + |> Repo.insert() + + # Add a local activity without existing referenced object, activity or actor + %Activity{} + |> Map.merge(%{ + local: true, + data: %{"id" => "local_activity_with_actor", "object" => "non_existing"} + }) + |> Repo.insert() + + # The remote activities without existing reference, and only the remote activities without existing reference, are deleted + # if, and only if, we provide the --prune-orphaned-activities option + assert length(Repo.all(Activity)) == 5 + Mix.Tasks.Pleroma.Database.run(["prune_objects"]) + assert length(Repo.all(Activity)) == 5 + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-orphaned-activities"]) + activities = Repo.all(Activity) + + assert "remote_activity_without_existing_referenced_object" not in Enum.map( + activities, + fn a -> a.data["id"] end + ) + + assert length(activities) == 4 + end + + test "it prunes orphaned activities with the --prune-orphaned-activities when the objects are referenced from an array" do + %Object{} |> Map.merge(%{data: %{"id" => "existing_object"}}) |> Repo.insert() + %User{} |> Map.merge(%{ap_id: "existing_actor"}) |> Repo.insert() + + # Multiple objects, one object exists (keep) + %Activity{} + |> Map.merge(%{ + local: false, + data: %{ + "id" => "remote_activity_existing_object", + "object" => ["non_ existing_object", "existing_object"] + } + }) + |> Repo.insert() + + # Multiple objects, one actor exists (keep) + %Activity{} + |> Map.merge(%{ + local: false, + data: %{ + "id" => "remote_activity_existing_actor", + "object" => ["non_ existing_object", "existing_actor"] + } + }) + |> Repo.insert() + + # Multiple objects, one activity exists (keep) + %Activity{} + |> Map.merge(%{ + local: false, + data: %{ + "id" => "remote_activity_existing_activity", + "object" => ["non_ existing_object", "remote_activity_existing_actor"] + } + }) + |> Repo.insert() + + # Multiple objects none exist (prune) + %Activity{} + |> Map.merge(%{ + local: false, + data: %{ + "id" => "remote_activity_without_existing_referenced_object", + "object" => ["owo", "whats_this"] + } + }) + |> Repo.insert() + + assert length(Repo.all(Activity)) == 4 + Mix.Tasks.Pleroma.Database.run(["prune_objects"]) + assert length(Repo.all(Activity)) == 4 + Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-orphaned-activities"]) + activities = Repo.all(Activity) + assert length(activities) == 3 + + assert "remote_activity_without_existing_referenced_object" not in Enum.map( + activities, + fn a -> a.data["id"] end + ) + + assert length(activities) == 3 end end From fdc3cbb8cbefad2161cc408b4440420d6e4b2a88 Mon Sep 17 00:00:00 2001 From: faried nawaz Date: Mon, 18 Sep 2023 02:13:52 +0500 Subject: [PATCH 02/22] add documentation for the prune_objects mix task options --- changelog.d/akkoma-prune-options.add | 1 + docs/administration/CLI_tasks/database.md | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 changelog.d/akkoma-prune-options.add diff --git a/changelog.d/akkoma-prune-options.add b/changelog.d/akkoma-prune-options.add new file mode 100644 index 0000000000..6bc5e7f926 --- /dev/null +++ b/changelog.d/akkoma-prune-options.add @@ -0,0 +1 @@ +Add options to the mix prune_objects task diff --git a/docs/administration/CLI_tasks/database.md b/docs/administration/CLI_tasks/database.md index c53c499211..c5e51e5552 100644 --- a/docs/administration/CLI_tasks/database.md +++ b/docs/administration/CLI_tasks/database.md @@ -21,16 +21,18 @@ Replaces embedded objects with references to them in the `objects` table. Only n mix pleroma.database remove_embedded_objects [option ...] ``` - ### Options - `--vacuum` - run `VACUUM FULL` after the embedded objects are replaced with their references ## Prune old remote posts from the database -This will prune remote posts older than 90 days (configurable with [`config :pleroma, :instance, remote_post_retention_days`](../../configuration/cheatsheet.md#instance)) from the database, they will be refetched from source when accessed. +This will prune remote posts older than 90 days (configurable with [`config :pleroma, :instance, remote_post_retention_days`](../../configuration/cheatsheet.md#instance)) from the database. Pruned posts may be refetched in some cases. + +!!! note + The disk space will only be reclaimed after a proper vacuum. By default Postgresql does this for you on a regular basis, but if your instance has been running for a long time and there are many rows deleted, it may be advantageous to use `VACUUM FULL` (e.g. by using the `--vacuum` option). !!! danger - The disk space will only be reclaimed after `VACUUM FULL`. You may run out of disk space during the execution of the task or vacuuming if you don't have about 1/3rds of the database size free. + You may run out of disk space during the execution of the task or vacuuming if you don't have about 1/3rds of the database size free. Vacuum causes a substantial increase in I/O traffic, and may lead to a degraded experience while it is running. === "OTP" @@ -45,7 +47,11 @@ This will prune remote posts older than 90 days (configurable with [`config :ple ``` ### Options -- `--vacuum` - run `VACUUM FULL` after the objects are pruned + +- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also won't delete posts when at least one of the posts in that thread is kept (e.g. because one of the posts has seen recent activity). +- `--keep-non-public` - Keep non-public posts like DM's and followers-only, even if they are remote. +- `--prune-orphaned-activities` - Also prune orphaned activities afterwards. Activities are things like Like, Create, Announce, Flag (aka reports). They can significantly help reduce the database size. Note: this can take a very long time. +- `--vacuum` - Run `VACUUM FULL` after the objects are pruned. This should not be used on a regular basis, but is useful if your instance has been running for a long time before pruning. ## Create a conversation for all existing DMs @@ -93,6 +99,9 @@ Can be safely re-run ## Vacuum the database +!!! note + By default Postgresql has an autovacuum deamon running. While the tasks described here can help in some cases, they shouldn't be needed on a regular basis. See [the Postgresql docs on vacuuming](https://www.postgresql.org/docs/current/sql-vacuum.html) for more information on this. + ### Analyze Running an `analyze` vacuum job can improve performance by updating statistics used by the query planner. **It is safe to cancel this.** From 0b864c3696e47ba1def6047905dad9065b0bee0e Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 28 May 2024 08:49:34 -0400 Subject: [PATCH 03/22] Dialyzer: fix invalid @spec lib/pleroma/notification.ex:492:invalid_contract The @spec for the function does not match the success typing of the function. Function: Pleroma.Notification.get_notified_from_activity/2 Success typing: @spec get_notified_from_activity(_, _) :: [any()] --- lib/pleroma/notification.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/notification.ex b/lib/pleroma/notification.ex index 4f714b25fe..f521a29988 100644 --- a/lib/pleroma/notification.ex +++ b/lib/pleroma/notification.ex @@ -489,7 +489,7 @@ def create_poll_notifications(%Activity{} = activity) do NOTE: might be called for FAKE Activities, see ActivityPub.Utils.get_notified_from_object/1 """ - @spec get_notified_from_activity(Activity.t(), boolean()) :: {list(User.t()), list(User.t())} + @spec get_notified_from_activity(Activity.t(), boolean()) :: list(User.t()) def get_notified_from_activity(activity, local_only \\ true) def get_notified_from_activity(%Activity{data: %{"type" => type}} = activity, local_only) From 42c5f7c74e93b7b489456578f8285d06320c15dc Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 28 May 2024 08:55:18 -0400 Subject: [PATCH 04/22] Dialyzer: fix invalid @spec The callback already defines the @spec and these do not match it. lib/pleroma/upload/filter/exiftool/strip_location.ex:12:callback_spec_type_mismatch The @spec return type does not match the expected return type for filter/1 callback in Pleroma.Upload.Filter behaviour. Actual: @spec filter(...) :: {:ok, _} Expected: @spec filter(...) :: {:error, _} | {:ok, :filtered | :noop} | {:ok, :filtered, struct()} --- lib/pleroma/upload/filter/exiftool/strip_location.ex | 2 -- lib/pleroma/upload/filter/mogrifun.ex | 1 - lib/pleroma/upload/filter/mogrify.ex | 1 - 3 files changed, 4 deletions(-) diff --git a/lib/pleroma/upload/filter/exiftool/strip_location.ex b/lib/pleroma/upload/filter/exiftool/strip_location.ex index f2bcc4622b..8becee712f 100644 --- a/lib/pleroma/upload/filter/exiftool/strip_location.ex +++ b/lib/pleroma/upload/filter/exiftool/strip_location.ex @@ -9,8 +9,6 @@ defmodule Pleroma.Upload.Filter.Exiftool.StripLocation do """ @behaviour Pleroma.Upload.Filter - @spec filter(Pleroma.Upload.t()) :: {:ok, any()} | {:error, String.t()} - # Formats not compatible with exiftool at this time def filter(%Pleroma.Upload{content_type: "image/heic"}), do: {:ok, :noop} def filter(%Pleroma.Upload{content_type: "image/webp"}), do: {:ok, :noop} diff --git a/lib/pleroma/upload/filter/mogrifun.ex b/lib/pleroma/upload/filter/mogrifun.ex index a0f247b704..9716580a84 100644 --- a/lib/pleroma/upload/filter/mogrifun.ex +++ b/lib/pleroma/upload/filter/mogrifun.ex @@ -38,7 +38,6 @@ defmodule Pleroma.Upload.Filter.Mogrifun do [{"fill", "yellow"}, {"tint", "40"}] ] - @spec filter(Pleroma.Upload.t()) :: {:ok, atom()} | {:error, String.t()} def filter(%Pleroma.Upload{tempfile: file, content_type: "image" <> _}) do try do Filter.Mogrify.do_filter(file, [Enum.random(@filters)]) diff --git a/lib/pleroma/upload/filter/mogrify.ex b/lib/pleroma/upload/filter/mogrify.ex index 06efbf3212..d1e166022b 100644 --- a/lib/pleroma/upload/filter/mogrify.ex +++ b/lib/pleroma/upload/filter/mogrify.ex @@ -8,7 +8,6 @@ defmodule Pleroma.Upload.Filter.Mogrify do @type conversion :: action :: String.t() | {action :: String.t(), opts :: String.t()} @type conversions :: conversion() | [conversion()] - @spec filter(Pleroma.Upload.t()) :: {:ok, :atom} | {:error, String.t()} def filter(%Pleroma.Upload{tempfile: file, content_type: "image" <> _}) do try do do_filter(file, Pleroma.Config.get!([__MODULE__, :args])) From f8ce639e3f76257097793c666d3ebf8f22539a30 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 28 May 2024 09:30:19 -0400 Subject: [PATCH 05/22] Dialyzer: guard clause can never succeed lib/pleroma/web/activity_pub/mrf/dnsrbl_policy.ex:106:guard_fail The guard clause: when _ :: [ binary() | [string() | char()] | {string() | integer(), string()} | {{byte(), byte(), byte(), byte()}, integer(), binary()} | {integer(), integer(), integer(), string() | byte()} | {integer(), integer(), string(), string(), string(), string()} | {string(), string(), integer(), integer(), integer(), integer(), integer()} | {char(), char(), char(), char(), char(), char(), char(), char()} ] === nil can never succeed. --- lib/pleroma/web/activity_pub/mrf/dnsrbl_policy.ex | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/web/activity_pub/mrf/dnsrbl_policy.ex b/lib/pleroma/web/activity_pub/mrf/dnsrbl_policy.ex index 9543cc5453..7c6bb888f1 100644 --- a/lib/pleroma/web/activity_pub/mrf/dnsrbl_policy.ex +++ b/lib/pleroma/web/activity_pub/mrf/dnsrbl_policy.ex @@ -103,7 +103,11 @@ defp check_rbl(%{host: actor_host}, object) do {:ok, object} else Task.start(fn -> - reason = rblquery(query, :txt) || "undefined" + reason = + case rblquery(query, :txt) do + [[result]] -> result + _ -> "undefined" + end Logger.warning( "DNSRBL Rejected activity from #{actor_host} for reason: #{inspect(reason)}" From 18835bf7012e8e234eb27456a437f4d1e8796645 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 28 May 2024 09:38:36 -0400 Subject: [PATCH 06/22] Use the configured http client options for mediaproxy --- lib/pleroma/helpers/media_helper.ex | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/helpers/media_helper.ex b/lib/pleroma/helpers/media_helper.ex index e44114d9da..0ac07fa41b 100644 --- a/lib/pleroma/helpers/media_helper.ex +++ b/lib/pleroma/helpers/media_helper.ex @@ -25,7 +25,7 @@ def missing_dependencies do end def image_resize(url, options) do - with {:ok, env} <- HTTP.get(url, [], pool: :media), + with {:ok, env} <- HTTP.get(url, [], http_client_opts()), {:ok, resized} <- Operation.thumbnail_buffer(env.body, options.max_width, height: options.max_height, @@ -46,7 +46,7 @@ def image_resize(url, options) do def video_framegrab(url) do with executable when is_binary(executable) <- System.find_executable("ffmpeg"), false <- @cachex.exists?(:failed_media_helper_cache, url), - {:ok, env} <- HTTP.get(url, [], pool: :media), + {:ok, env} <- HTTP.get(url, [], http_client_opts()), {:ok, pid} <- StringIO.open(env.body) do body_stream = IO.binstream(pid, 1) @@ -84,4 +84,6 @@ def video_framegrab(url) do {:error, _} = error -> error end end + + defp http_client_opts, do: Pleroma.Config.get([:media_proxy, :proxy_opts, :http], pool: :media) end From 17ebb2df8404474ef66c6a38e974143166b5e49b Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 28 May 2024 09:43:35 -0400 Subject: [PATCH 07/22] Dialyzer: fix pattern matches preventing video thumbnailing from working lib/pleroma/web/media_proxy/media_proxy_controller.ex:154:pattern_match The pattern can never match the type. Pattern: {:ok, _thumbnail_binary} Type: {:error, boolean() | {:ffmpeg, :command_not_found}} --- changelog.d/video-thumbs.fix | 1 + lib/pleroma/helpers/media_helper.ex | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 changelog.d/video-thumbs.fix diff --git a/changelog.d/video-thumbs.fix b/changelog.d/video-thumbs.fix new file mode 100644 index 0000000000..03e862f3db --- /dev/null +++ b/changelog.d/video-thumbs.fix @@ -0,0 +1 @@ +Video thumbnails were not being generated due to a negative cache lookup logic error diff --git a/lib/pleroma/helpers/media_helper.ex b/lib/pleroma/helpers/media_helper.ex index 0ac07fa41b..8566ab3ea2 100644 --- a/lib/pleroma/helpers/media_helper.ex +++ b/lib/pleroma/helpers/media_helper.ex @@ -45,7 +45,7 @@ def image_resize(url, options) do @spec video_framegrab(String.t()) :: {:ok, binary()} | {:error, any()} def video_framegrab(url) do with executable when is_binary(executable) <- System.find_executable("ffmpeg"), - false <- @cachex.exists?(:failed_media_helper_cache, url), + {:ok, false} <- @cachex.exists?(:failed_media_helper_cache, url), {:ok, env} <- HTTP.get(url, [], http_client_opts()), {:ok, pid} <- StringIO.open(env.body) do body_stream = IO.binstream(pid, 1) @@ -71,13 +71,13 @@ def video_framegrab(url) do end) case Task.yield(task, 5_000) do - nil -> + {:ok, result} -> + {:ok, result} + + _ -> Task.shutdown(task) @cachex.put(:failed_media_helper_cache, url, nil) {:error, {:ffmpeg, :timeout}} - - result -> - {:ok, result} end else nil -> {:error, {:ffmpeg, :command_not_found}} From 1b3c84e241f8ed1066d113346365ce489971ac14 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 28 May 2024 09:58:44 -0400 Subject: [PATCH 08/22] Dialyzer: no_local_return WebPushEncryption.send_web_push/4 was written to raise on erroroneus input, so we must guard against that. lib/pleroma/web/push/impl.ex:65:no_return Function push_message/4 has no local return. --- lib/pleroma/web/push/impl.ex | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/lib/pleroma/web/push/impl.ex b/lib/pleroma/web/push/impl.ex index 9e68d827b8..53334e72cf 100644 --- a/lib/pleroma/web/push/impl.ex +++ b/lib/pleroma/web/push/impl.ex @@ -63,19 +63,25 @@ def perform(_) do @doc "Push message to web" def push_message(body, sub, api_key, subscription) do - case WebPushEncryption.send_web_push(body, sub, api_key) do - {:ok, %{status: code}} when code in 400..499 -> - Logger.debug("Removing subscription record") - Repo.delete!(subscription) - :ok + try do + case WebPushEncryption.send_web_push(body, sub, api_key) do + {:ok, %{status: code}} when code in 400..499 -> + Logger.debug("Removing subscription record") + Repo.delete!(subscription) + :ok - {:ok, %{status: code}} when code in 200..299 -> - :ok + {:ok, %{status: code}} when code in 200..299 -> + :ok - {:ok, %{status: code}} -> - Logger.error("Web Push Notification failed with code: #{code}") - :error + {:ok, %{status: code}} -> + Logger.error("Web Push Notification failed with code: #{code}") + :error + error -> + Logger.error("Web Push Notification failed with #{inspect(error)}") + :error + end + rescue error -> Logger.error("Web Push Notification failed with #{inspect(error)}") :error From a041879eaaa7ca8ca421a89e303f58b5b68bc6da Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Tue, 28 May 2024 17:04:43 +0400 Subject: [PATCH 09/22] Linting --- test/mix/tasks/pleroma/database_test.exs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/test/mix/tasks/pleroma/database_test.exs b/test/mix/tasks/pleroma/database_test.exs index 01fd97e2dc..5f5ab5195d 100644 --- a/test/mix/tasks/pleroma/database_test.exs +++ b/test/mix/tasks/pleroma/database_test.exs @@ -352,12 +352,17 @@ test "with the --keep-threads option it keeps old threads with bookmarked posts" end test "We don't have unexpected tables which may contain objects that are referenced by activities" do - # We can delete orphaned activities. For that we look for the objects they reference in the 'objects', 'activities', and 'users' table. - # If someone adds another table with objects (idk, maybe with separate relations, or collections or w/e), then we need to make sure we - # add logic for that in the 'prune_objects' task so that we don't wrongly delete their corresponding activities. + # We can delete orphaned activities. For that we look for the objects + # they reference in the 'objects', 'activities', and 'users' table. + # If someone adds another table with objects (idk, maybe with separate + # relations, or collections or w/e), then we need to make sure we + # add logic for that in the 'prune_objects' task so that we don't + # wrongly delete their corresponding activities. # So when someone adds (or removes) a table, this test will fail. - # Either the table contains objects which can be referenced from the activities table - # => in that case the prune_objects job should be adapted so we don't delete activities who still have the referenced object. + # Either the table contains objects which can be referenced from the + # activities table + # => in that case the prune_objects job should be adapted so we don't + # delete activities who still have the referenced object. # Or it doesn't contain objects which can be referenced from the activities table # => in that case you can add/remove the table to/from this (sorted) list. @@ -463,7 +468,8 @@ test "it prunes orphaned activities with the --prune-orphaned-activities" do }) |> Repo.insert() - # The remote activities without existing reference, and only the remote activities without existing reference, are deleted + # The remote activities without existing reference, + # and only the remote activities without existing reference, are deleted # if, and only if, we provide the --prune-orphaned-activities option assert length(Repo.all(Activity)) == 5 Mix.Tasks.Pleroma.Database.run(["prune_objects"]) From 8743c6c640d395ff6d7d268df1e382ba0fb0ca96 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 28 May 2024 10:36:00 -0400 Subject: [PATCH 10/22] Dialyzer: The pattern can never match the type We will never pass :plain to query_with/4, so remove that match and change it to query_with/3 lib/pleroma/search/database_search.ex:127:pattern_match The pattern can never match the type. Pattern: _q, :rum, _search_query, :plain Type: %Ecto.Query{ :aliases => _, :assocs => _, :combinations => _, :distinct => _, :from => _, :group_bys => _, :havings => _, :joins => _, :limit => _, :lock => _, :offset => _, :order_bys => _, :prefix => _, :preloads => _, :select => _, :sources => _, :updates => _, :wheres => _, :windows => _, :with_ctes => _ }, :rum, _, :websearch --- lib/pleroma/search/database_search.ex | 36 +++------------------------ 1 file changed, 3 insertions(+), 33 deletions(-) diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex index c6fe8a9bd8..aef5d1e741 100644 --- a/lib/pleroma/search/database_search.ex +++ b/lib/pleroma/search/database_search.ex @@ -28,7 +28,7 @@ def search(user, search_query, options \\ []) do |> Activity.with_preloaded_object() |> Activity.restrict_deactivated_users() |> restrict_public(user) - |> query_with(index_type, search_query, :websearch) + |> query_with(index_type, search_query) |> maybe_restrict_local(user) |> maybe_restrict_author(author) |> maybe_restrict_blocked(user) @@ -88,25 +88,7 @@ defp restrict_public(q, _user) do ) end - defp query_with(q, :gin, search_query, :plain) do - %{rows: [[tsc]]} = - Ecto.Adapters.SQL.query!( - Pleroma.Repo, - "select current_setting('default_text_search_config')::regconfig::oid;" - ) - - from([a, o] in q, - where: - fragment( - "to_tsvector(?::oid::regconfig, ?->>'content') @@ plainto_tsquery(?)", - ^tsc, - o.data, - ^search_query - ) - ) - end - - defp query_with(q, :gin, search_query, :websearch) do + defp query_with(q, :gin, search_query) do %{rows: [[tsc]]} = Ecto.Adapters.SQL.query!( Pleroma.Repo, @@ -124,19 +106,7 @@ defp query_with(q, :gin, search_query, :websearch) do ) end - defp query_with(q, :rum, search_query, :plain) do - from([a, o] in q, - where: - fragment( - "? @@ plainto_tsquery(?)", - o.fts_content, - ^search_query - ), - order_by: [fragment("? <=> now()::date", o.inserted_at)] - ) - end - - defp query_with(q, :rum, search_query, :websearch) do + defp query_with(q, :rum, search_query) do from([a, o] in q, where: fragment( From 6551ca2db7a0907252bbc649c7d082b3edf92a93 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 28 May 2024 10:40:54 -0400 Subject: [PATCH 11/22] Dialyzer: overlapping_contract Wrong @spec name for remove_from_block/2 lib/pleroma/user.ex:2721:overlapping_contract Overloaded contract for Pleroma.User.add_to_block/2 has overlapping domains; such contracts are currently unsupported and are simply ignored. --- lib/pleroma/user.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 6d6aa98b52..d94b68ce09 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -2727,7 +2727,7 @@ defp add_to_block(%User{} = user, %User{} = blocked) do end end - @spec add_to_block(User.t(), User.t()) :: + @spec remove_from_block(User.t(), User.t()) :: {:ok, UserRelationship.t()} | {:ok, nil} | {:error, Ecto.Changeset.t()} defp remove_from_block(%User{} = user, %User{} = blocked) do with {:ok, relationship} <- UserRelationship.delete_block(user, blocked) do From 6b6a2adb07c3b9a52cd0a5adf435a916088bb4d7 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 28 May 2024 10:49:43 -0400 Subject: [PATCH 12/22] Dialyzer: The function call will not succeed. :idna.encode/1 expects a charlist even though it will accept a binary string. That functionality is undocumented / not part of its typespec, so we should turn it into a charlist first. Also switch to using match?/2 lib/pleroma/user.ex:2056:call The function call will not succeed. :idna.encode(_host :: binary()) will never return since the success typing is: (string()) :: string() and the contract is (string()) :: string() --- lib/pleroma/user.ex | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index d94b68ce09..884c1f3022 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -2053,7 +2053,8 @@ defp verify_field_link(field, profile_urls) do %{scheme: scheme, userinfo: nil, host: host} when not_empty_string(host) and scheme in ["http", "https"] <- URI.parse(value), - {:not_idn, true} <- {:not_idn, to_string(:idna.encode(host)) == host}, + {:not_idn, true} <- + {:not_idn, match?(^host, to_string(:idna.encode(to_charlist(host))))}, "me" <- Pleroma.Web.RelMe.maybe_put_rel_me(value, profile_urls) do CommonUtils.to_masto_date(NaiveDateTime.utc_now()) else From f663135724fac2ef12107369f94d8e030bc1b4a5 Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Tue, 28 May 2024 18:54:36 +0400 Subject: [PATCH 13/22] DatabaseTest: Fix test. --- test/mix/tasks/pleroma/database_test.exs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/mix/tasks/pleroma/database_test.exs b/test/mix/tasks/pleroma/database_test.exs index 5f5ab5195d..d773038cb6 100644 --- a/test/mix/tasks/pleroma/database_test.exs +++ b/test/mix/tasks/pleroma/database_test.exs @@ -375,6 +375,7 @@ test "We don't have unexpected tables which may contain objects that are referen ["announcements"], ["apps"], ["backups"], + ["bookmark_folders"], ["bookmarks"], ["chat_message_references"], ["chats"], @@ -405,6 +406,8 @@ test "We don't have unexpected tables which may contain objects that are referen ["push_subscriptions"], ["registrations"], ["report_notes"], + ["rich_media_card"], + ["rules"], ["scheduled_activities"], ["schema_migrations"], ["thread_mutes"], From 79c418bcb7534ae3645ee0b7728f8e65a031f7a4 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 28 May 2024 11:07:28 -0400 Subject: [PATCH 14/22] Dialyzer: fix invalid @spec --- lib/pleroma/web/o_auth/token.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/web/o_auth/token.ex b/lib/pleroma/web/o_auth/token.ex index a5ad2e909f..9b1198b428 100644 --- a/lib/pleroma/web/o_auth/token.ex +++ b/lib/pleroma/web/o_auth/token.ex @@ -96,7 +96,7 @@ defp put_valid_until(changeset, attrs) do |> validate_required([:valid_until]) end - @spec create(App.t(), User.t(), map()) :: {:ok, Token} | {:error, Ecto.Changeset.t()} + @spec create(App.t(), User.t(), map()) :: {:ok, Token.t()} | {:error, Ecto.Changeset.t()} def create(%App{} = app, %User{} = user, attrs \\ %{}) do with {:ok, token} <- do_create(app, user, attrs) do if Pleroma.Config.get([:oauth2, :clean_expired_tokens]) do From 14b4bd69a83846b3c117624851b96b9fc30528bf Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 29 May 2024 10:44:34 -0400 Subject: [PATCH 15/22] Add additional flags to the Pleroma.Search.Indexer Mix task --- changelog.d/mix-indexer.add | 1 + lib/mix/tasks/pleroma/search/indexer.ex | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 changelog.d/mix-indexer.add diff --git a/changelog.d/mix-indexer.add b/changelog.d/mix-indexer.add new file mode 100644 index 0000000000..6effb959bd --- /dev/null +++ b/changelog.d/mix-indexer.add @@ -0,0 +1 @@ +Permit passing --chunk and --step values to the Pleroma.Search.Indexer Mix task diff --git a/lib/mix/tasks/pleroma/search/indexer.ex b/lib/mix/tasks/pleroma/search/indexer.ex index 81a9fced63..2a52472f9b 100644 --- a/lib/mix/tasks/pleroma/search/indexer.ex +++ b/lib/mix/tasks/pleroma/search/indexer.ex @@ -33,15 +33,18 @@ def run(["index" | options]) do OptionParser.parse( options, strict: [ - limit: :integer + chunk: :integer, + limit: :integer, + step: :integer ] ) start_pleroma() + chunk_size = Keyword.get(options, :chunk, 100) limit = Keyword.get(options, :limit, 100_000) + per_step = Keyword.get(options, :step, 1000) - per_step = 1000 chunks = max(div(limit, per_step), 1) 1..chunks @@ -65,7 +68,7 @@ def run(["index" | options]) do IO.puts("Got #{length(ids)} activities, adding to indexer") ids - |> Enum.chunk_every(100) + |> Enum.chunk_every(chunk_size) |> Enum.each(fn chunk -> IO.puts("Adding #{length(chunk)} activities to indexing queue") From 36b440d9bebb1ddba817cd9bd8f158ceadbe8aa2 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 29 May 2024 21:58:39 -0400 Subject: [PATCH 16/22] Update Bandit to 1.5.2 Lots of fixes, also requires Websock Adapter update due to internal module changes in Bandit 1.4.0. --- changelog.d/bandit_update_1.5.2.change | 1 + mix.exs | 3 ++- mix.lock | 16 ++++++++-------- 3 files changed, 11 insertions(+), 9 deletions(-) create mode 100644 changelog.d/bandit_update_1.5.2.change diff --git a/changelog.d/bandit_update_1.5.2.change b/changelog.d/bandit_update_1.5.2.change new file mode 100644 index 0000000000..c4aae16367 --- /dev/null +++ b/changelog.d/bandit_update_1.5.2.change @@ -0,0 +1 @@ +Update Bandit to 1.5.2 diff --git a/mix.exs b/mix.exs index fe50139efa..a44daab8bf 100644 --- a/mix.exs +++ b/mix.exs @@ -188,7 +188,8 @@ defp deps do {:exile, git: "https://github.com/akash-akya/exile.git", ref: "be87c33b02a7c3c5d22d2ece01fbd462355b28ef"}, - {:bandit, "~> 1.2"}, + {:bandit, "~> 1.5.2"}, + {:websock_adapter, "~> 0.5.6"}, ## dev & test {:ex_doc, "~> 0.22", only: :dev, runtime: false}, diff --git a/mix.lock b/mix.lock index 86545adcf4..a55ad0126c 100644 --- a/mix.lock +++ b/mix.lock @@ -1,6 +1,6 @@ %{ "accept": {:hex, :accept, "0.3.5", "b33b127abca7cc948bbe6caa4c263369abf1347cfa9d8e699c6d214660f10cd1", [:rebar3], [], "hexpm", "11b18c220bcc2eab63b5470c038ef10eb6783bcb1fcdb11aa4137defa5ac1bb8"}, - "bandit": {:hex, :bandit, "1.2.1", "aa485b4ac175065b8e0fb5864ddd5dd7b50d52336b36f61c82f484c3718b3d15", [:mix], [{:hpax, "~> 0.1.1", [hex: :hpax, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:thousand_island, "~> 1.0", [hex: :thousand_island, repo: "hexpm", optional: false]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "27393e590a407f1b7d51c5fee4737f139fe224a30449ce25061eac70f763896b"}, + "bandit": {:hex, :bandit, "1.5.2", "ed0a41c43a9e529c670d0fd48371db4027e7b80d43b1942893e17deb8bed0540", [:mix], [{:hpax, "~> 0.1.1", [hex: :hpax, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:thousand_island, "~> 1.0", [hex: :thousand_island, repo: "hexpm", optional: false]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "35ddbdce7e8a2a3c6b5093f7299d70832a43ed2f4a1852885a61d334cab1b4ad"}, "base62": {:hex, :base62, "1.2.2", "85c6627eb609317b70f555294045895ffaaeb1758666ab9ef9ca38865b11e629", [:mix], [{:custom_base, "~> 0.2.1", [hex: :custom_base, repo: "hexpm", optional: false]}], "hexpm", "d41336bda8eaa5be197f1e4592400513ee60518e5b9f4dcf38f4b4dae6f377bb"}, "bbcode_pleroma": {:hex, :bbcode_pleroma, "0.2.0", "d36f5bca6e2f62261c45be30fa9b92725c0655ad45c99025cb1c3e28e25803ef", [:mix], [{:nimble_parsec, "~> 0.5", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "19851074419a5fedb4ef49e1f01b30df504bb5dbb6d6adfc135238063bebd1c3"}, "bcrypt_elixir": {:hex, :bcrypt_elixir, "2.3.1", "5114d780459a04f2b4aeef52307de23de961b69e13a5cd98a911e39fda13f420", [:make, :mix], [{:comeonin, "~> 5.3", [hex: :comeonin, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.6", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "42182d5f46764def15bf9af83739e3bf4ad22661b1c34fc3e88558efced07279"}, @@ -19,9 +19,9 @@ "connection": {:hex, :connection, "1.1.0", "ff2a49c4b75b6fb3e674bfc5536451607270aac754ffd1bdfe175abe4a6d7a68", [:mix], [], "hexpm", "722c1eb0a418fbe91ba7bd59a47e28008a189d47e37e0e7bb85585a016b2869c"}, "cors_plug": {:hex, :cors_plug, "2.0.3", "316f806d10316e6d10f09473f19052d20ba0a0ce2a1d910ddf57d663dac402ae", [:mix], [{:plug, "~> 1.8", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "ee4ae1418e6ce117fc42c2ba3e6cbdca4e95ecd2fe59a05ec6884ca16d469aea"}, "covertool": {:hex, :covertool, "2.0.6", "4a291b4e3449025b0595d8f44c8d7635d4f48f033be2ce88d22a329f36f94a91", [:rebar3], [], "hexpm", "5db3fcd82180d8ea4ad857d4d1ab21a8d31b5aee0d60d2f6c0f9e25a411d1e21"}, - "cowboy": {:hex, :cowboy, "2.10.0", "ff9ffeff91dae4ae270dd975642997afe2a1179d94b1887863e43f681a203e26", [:make, :rebar3], [{:cowlib, "2.12.1", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "1.8.0", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "3afdccb7183cc6f143cb14d3cf51fa00e53db9ec80cdcd525482f5e99bc41d6b"}, + "cowboy": {:hex, :cowboy, "2.12.0", "f276d521a1ff88b2b9b4c54d0e753da6c66dd7be6c9fca3d9418b561828a3731", [:make, :rebar3], [{:cowlib, "2.13.0", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "1.8.0", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "8a7abe6d183372ceb21caa2709bec928ab2b72e18a3911aa1771639bef82651e"}, "cowboy_telemetry": {:hex, :cowboy_telemetry, "0.4.0", "f239f68b588efa7707abce16a84d0d2acf3a0f50571f8bb7f56a15865aae820c", [:rebar3], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "7d98bac1ee4565d31b62d59f8823dfd8356a169e7fcbb83831b8a5397404c9de"}, - "cowlib": {:hex, :cowlib, "2.12.1", "a9fa9a625f1d2025fe6b462cb865881329b5caff8f1854d1cbc9f9533f00e1e1", [:make, :rebar3], [], "hexpm", "163b73f6367a7341b33c794c4e88e7dbfe6498ac42dcd69ef44c5bc5507c8db0"}, + "cowlib": {:hex, :cowlib, "2.13.0", "db8f7505d8332d98ef50a3ef34b34c1afddec7506e4ee4dd4a3a266285d282ca", [:make, :rebar3], [], "hexpm", "e1e1284dc3fc030a64b1ad0d8382ae7e99da46c3246b815318a4b848873800a4"}, "credo": {:hex, :credo, "1.7.3", "05bb11eaf2f2b8db370ecaa6a6bda2ec49b2acd5e0418bc106b73b07128c0436", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "35ea675a094c934c22fb1dca3696f3c31f2728ae6ef5a53b5d648c11180a4535"}, "crontab": {:hex, :crontab, "1.1.8", "2ce0e74777dfcadb28a1debbea707e58b879e6aa0ffbf9c9bb540887bce43617", [:mix], [{:ecto, "~> 1.0 or ~> 2.0 or ~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}], "hexpm"}, "custom_base": {:hex, :custom_base, "0.2.1", "4a832a42ea0552299d81652aa0b1f775d462175293e99dfbe4d7dbaab785a706", [:mix], [], "hexpm", "8df019facc5ec9603e94f7270f1ac73ddf339f56ade76a721eaa57c1493ba463"}, @@ -102,9 +102,9 @@ "phoenix_swoosh": {:hex, :phoenix_swoosh, "1.2.1", "b74ccaa8046fbc388a62134360ee7d9742d5a8ae74063f34eb050279de7a99e1", [:mix], [{:finch, "~> 0.8", [hex: :finch, repo: "hexpm", optional: true]}, {:hackney, "~> 1.10", [hex: :hackney, repo: "hexpm", optional: true]}, {:phoenix, "~> 1.6", [hex: :phoenix, repo: "hexpm", optional: true]}, {:phoenix_html, "~> 3.0 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:phoenix_view, "~> 1.0 or ~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: false]}, {:swoosh, "~> 1.5", [hex: :swoosh, repo: "hexpm", optional: false]}], "hexpm", "4000eeba3f9d7d1a6bf56d2bd56733d5cadf41a7f0d8ffe5bb67e7d667e204a2"}, "phoenix_template": {:hex, :phoenix_template, "1.0.4", "e2092c132f3b5e5b2d49c96695342eb36d0ed514c5b252a77048d5969330d639", [:mix], [{:phoenix_html, "~> 2.14.2 or ~> 3.0 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}], "hexpm", "2c0c81f0e5c6753faf5cca2f229c9709919aba34fab866d3bc05060c9c444206"}, "phoenix_view": {:hex, :phoenix_view, "2.0.3", "4d32c4817fce933693741deeb99ef1392619f942633dde834a5163124813aad3", [:mix], [{:phoenix_html, "~> 2.14.2 or ~> 3.0 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}], "hexpm", "cd34049af41be2c627df99cd4eaa71fc52a328c0c3d8e7d4aa28f880c30e7f64"}, - "plug": {:hex, :plug, "1.15.3", "712976f504418f6dff0a3e554c40d705a9bcf89a7ccef92fc6a5ef8f16a30a97", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "cc4365a3c010a56af402e0809208873d113e9c38c401cabd88027ef4f5c01fd2"}, - "plug_cowboy": {:hex, :plug_cowboy, "2.6.2", "753611b23b29231fb916b0cdd96028084b12aff57bfd7b71781bd04b1dbeb5c9", [:mix], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:cowboy_telemetry, "~> 0.3", [hex: :cowboy_telemetry, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "951ed2433df22f4c97b85fdb145d4cee561f36b74854d64c06d896d7cd2921a7"}, - "plug_crypto": {:hex, :plug_crypto, "2.0.0", "77515cc10af06645abbfb5e6ad7a3e9714f805ae118fa1a70205f80d2d70fe73", [:mix], [], "hexpm", "53695bae57cc4e54566d993eb01074e4d894b65a3766f1c43e2c61a1b0f45ea9"}, + "plug": {:hex, :plug, "1.16.0", "1d07d50cb9bb05097fdf187b31cf087c7297aafc3fed8299aac79c128a707e47", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "cbf53aa1f5c4d758a7559c0bd6d59e286c2be0c6a1fac8cc3eee2f638243b93e"}, + "plug_cowboy": {:hex, :plug_cowboy, "2.7.1", "87677ffe3b765bc96a89be7960f81703223fe2e21efa42c125fcd0127dd9d6b2", [:mix], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:cowboy_telemetry, "~> 0.3", [hex: :cowboy_telemetry, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "02dbd5f9ab571b864ae39418db7811618506256f6d13b4a45037e5fe78dc5de3"}, + "plug_crypto": {:hex, :plug_crypto, "2.1.0", "f44309c2b06d249c27c8d3f65cfe08158ade08418cf540fd4f72d4d6863abb7b", [:mix], [], "hexpm", "131216a4b030b8f8ce0f26038bc4421ae60e4bb95c5cf5395e1421437824c4fa"}, "plug_static_index_html": {:hex, :plug_static_index_html, "1.0.0", "840123d4d3975585133485ea86af73cb2600afd7f2a976f9f5fd8b3808e636a0", [:mix], [{:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "79fd4fcf34d110605c26560cbae8f23c603ec4158c08298bd4360fdea90bb5cf"}, "poison": {:hex, :poison, "3.1.0", "d9eb636610e096f86f25d9a46f35a9facac35609a7591b3be3326e99a0484665", [:mix], [], "hexpm", "fec8660eb7733ee4117b85f55799fd3833eb769a6df71ccf8903e8dc5447cfce"}, "poolboy": {:hex, :poolboy, "1.5.2", "392b007a1693a64540cead79830443abf5762f5d30cf50bc95cb2c1aaafa006b", [:rebar3], [], "hexpm", "dad79704ce5440f3d5a3681c8590b9dc25d1a561e8f5a9c995281012860901e3"}, @@ -134,7 +134,7 @@ "telemetry_metrics_prometheus_core": {:hex, :telemetry_metrics_prometheus_core, "1.2.0", "b583c3f18508f5c5561b674d16cf5d9afd2ea3c04505b7d92baaeac93c1b8260", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:telemetry_metrics, "~> 0.6", [hex: :telemetry_metrics, repo: "hexpm", optional: false]}], "hexpm", "9cba950e1c4733468efbe3f821841f34ac05d28e7af7798622f88ecdbbe63ea3"}, "telemetry_poller": {:hex, :telemetry_poller, "1.0.0", "db91bb424e07f2bb6e73926fcafbfcbcb295f0193e0a00e825e589a0a47e8453", [:rebar3], [{:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "b3a24eafd66c3f42da30fc3ca7dda1e9d546c12250a2d60d7b81d264fbec4f6e"}, "tesla": {:hex, :tesla, "1.8.0", "d511a4f5c5e42538d97eef7c40ec4f3e44effdc5068206f42ed859e09e51d1fd", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:exjsx, ">= 3.0.0", [hex: :exjsx, repo: "hexpm", optional: true]}, {:finch, "~> 0.13", [hex: :finch, repo: "hexpm", optional: true]}, {:fuse, "~> 2.4", [hex: :fuse, repo: "hexpm", optional: true]}, {:gun, ">= 1.0.0", [hex: :gun, repo: "hexpm", optional: true]}, {:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: true]}, {:ibrowse, "4.4.2", [hex: :ibrowse, repo: "hexpm", optional: true]}, {:jason, ">= 1.0.0", [hex: :jason, repo: "hexpm", optional: true]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.0", [hex: :mint, repo: "hexpm", optional: true]}, {:msgpax, "~> 2.3", [hex: :msgpax, repo: "hexpm", optional: true]}, {:poison, ">= 1.0.0", [hex: :poison, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm", "10501f360cd926a309501287470372af1a6e1cbed0f43949203a4c13300bc79f"}, - "thousand_island": {:hex, :thousand_island, "1.3.2", "bc27f9afba6e1a676dd36507d42e429935a142cf5ee69b8e3f90bff1383943cd", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "0e085b93012cd1057b378fce40cbfbf381ff6d957a382bfdd5eca1a98eec2535"}, + "thousand_island": {:hex, :thousand_island, "1.3.5", "6022b6338f1635b3d32406ff98d68b843ba73b3aa95cfc27154223244f3a6ca5", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "2be6954916fdfe4756af3239fb6b6d75d0b8063b5df03ba76fd8a4c87849e180"}, "timex": {:hex, :timex, "3.7.7", "3ed093cae596a410759104d878ad7b38e78b7c2151c6190340835515d4a46b8a", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 1.0", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm", "0ec4b09f25fe311321f9fc04144a7e3affe48eb29481d7a5583849b6c4dfa0a7"}, "toml": {:hex, :toml, "0.7.0", "fbcd773caa937d0c7a02c301a1feea25612720ac3fa1ccb8bfd9d30d822911de", [:mix], [], "hexpm", "0690246a2478c1defd100b0c9b89b4ea280a22be9a7b313a8a058a2408a2fa70"}, "trailing_format_plug": {:hex, :trailing_format_plug, "0.0.7", "64b877f912cf7273bed03379936df39894149e35137ac9509117e59866e10e45", [:mix], [{:plug, "> 0.12.0", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "bd4fde4c15f3e993a999e019d64347489b91b7a9096af68b2bdadd192afa693f"}, @@ -145,6 +145,6 @@ "vix": {:hex, :vix, "0.26.0", "027f10b6969b759318be84bd0bd8c88af877445e4e41cf96a0460392cea5399c", [:make, :mix], [{:castore, "~> 1.0 or ~> 0.1", [hex: :castore, repo: "hexpm", optional: false]}, {:cc_precompiler, "~> 0.2 or ~> 0.1.4", [hex: :cc_precompiler, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.8 or ~> 0.7.3", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}], "hexpm", "71b0a79ae7f199cacfc8e679b0e4ba25ee47dc02e182c5b9097efb29fbe14efd"}, "web_push_encryption": {:hex, :web_push_encryption, "0.3.1", "76d0e7375142dfee67391e7690e89f92578889cbcf2879377900b5620ee4708d", [:mix], [{:httpoison, "~> 1.0", [hex: :httpoison, repo: "hexpm", optional: false]}, {:jose, "~> 1.11.1", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm", "4f82b2e57622fb9337559058e8797cb0df7e7c9790793bdc4e40bc895f70e2a2"}, "websock": {:hex, :websock, "0.5.3", "2f69a6ebe810328555b6fe5c831a851f485e303a7c8ce6c5f675abeb20ebdadc", [:mix], [], "hexpm", "6105453d7fac22c712ad66fab1d45abdf049868f253cf719b625151460b8b453"}, - "websock_adapter": {:hex, :websock_adapter, "0.5.5", "9dfeee8269b27e958a65b3e235b7e447769f66b5b5925385f5a569269164a210", [:mix], [{:bandit, ">= 0.6.0", [hex: :bandit, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.6", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "4b977ba4a01918acbf77045ff88de7f6972c2a009213c515a445c48f224ffce9"}, + "websock_adapter": {:hex, :websock_adapter, "0.5.6", "0437fe56e093fd4ac422de33bf8fc89f7bc1416a3f2d732d8b2c8fd54792fe60", [:mix], [{:bandit, ">= 0.6.0", [hex: :bandit, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.6", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "e04378d26b0af627817ae84c92083b7e97aca3121196679b73c73b99d0d133ea"}, "websockex": {:hex, :websockex, "0.4.3", "92b7905769c79c6480c02daacaca2ddd49de936d912976a4d3c923723b647bf0", [:mix], [], "hexpm", "95f2e7072b85a3a4cc385602d42115b73ce0b74a9121d0d6dbbf557645ac53e4"}, } From b5fcb82bffd3f31cf1318c1504fcb97e56b892cd Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Thu, 30 May 2024 10:47:51 -0400 Subject: [PATCH 17/22] Test for missing FK indexes --- test/fixtures/unindexed_fk.sql | 27 +++++++++++++++++++++++++++ test/pleroma/schema_test.exs | 17 +++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 test/fixtures/unindexed_fk.sql create mode 100644 test/pleroma/schema_test.exs diff --git a/test/fixtures/unindexed_fk.sql b/test/fixtures/unindexed_fk.sql new file mode 100644 index 0000000000..3b71679cfa --- /dev/null +++ b/test/fixtures/unindexed_fk.sql @@ -0,0 +1,27 @@ +-- Unindexed FK -- Missing indexes - For CI + +WITH y AS ( +SELECT +pg_catalog.format('%I', c1.relname) AS referencing_tbl, +pg_catalog.quote_ident(a1.attname) AS referencing_column, +(SELECT pg_get_expr(indpred, indrelid) FROM pg_catalog.pg_index WHERE indrelid = t.conrelid AND indkey[0] = t.conkey[1] AND indpred IS NOT NULL LIMIT 1) partial_statement +FROM pg_catalog.pg_constraint t +JOIN pg_catalog.pg_attribute a1 ON a1.attrelid = t.conrelid AND a1.attnum = t.conkey[1] +JOIN pg_catalog.pg_class c1 ON c1.oid = t.conrelid +JOIN pg_catalog.pg_namespace n1 ON n1.oid = c1.relnamespace +JOIN pg_catalog.pg_class c2 ON c2.oid = t.confrelid +JOIN pg_catalog.pg_namespace n2 ON n2.oid = c2.relnamespace +JOIN pg_catalog.pg_attribute a2 ON a2.attrelid = t.confrelid AND a2.attnum = t.confkey[1] +WHERE t.contype = 'f' +AND NOT EXISTS ( +SELECT 1 +FROM pg_catalog.pg_index i +WHERE i.indrelid = t.conrelid +AND i.indkey[0] = t.conkey[1] +AND indpred IS NULL +) +) +SELECT referencing_tbl || '.' || referencing_column as "column" +FROM y +WHERE (partial_statement IS NULL OR partial_statement <> ('(' || referencing_column || ' IS NOT NULL)')) +ORDER BY 1; \ No newline at end of file diff --git a/test/pleroma/schema_test.exs b/test/pleroma/schema_test.exs new file mode 100644 index 0000000000..9bddd2031d --- /dev/null +++ b/test/pleroma/schema_test.exs @@ -0,0 +1,17 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.SchemaTest do + use Pleroma.DataCase, async: true + + alias Pleroma.Repo + + test "No unindexed foreign keys" do + query = File.read!("test/fixtures/unindexed_fk.sql") + + {:ok, result} = Repo.query(query) + + assert Enum.empty?(result.rows) + end +end From c20ac6d1adc224232422640d8bc11a80f5eff350 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 29 May 2024 21:27:35 -0400 Subject: [PATCH 18/22] Add missing foreign key indexes --- ...0240530011739_add_missing_foreign_keys.exs | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 priv/repo/migrations/20240530011739_add_missing_foreign_keys.exs diff --git a/priv/repo/migrations/20240530011739_add_missing_foreign_keys.exs b/priv/repo/migrations/20240530011739_add_missing_foreign_keys.exs new file mode 100644 index 0000000000..158f9701b3 --- /dev/null +++ b/priv/repo/migrations/20240530011739_add_missing_foreign_keys.exs @@ -0,0 +1,20 @@ +defmodule Pleroma.Repo.Migrations.AddMissingForeignKeys do + use Ecto.Migration + + def change do + create_if_not_exists(index(:announcement_read_relationships, :announcement_id)) + create_if_not_exists(index(:bookmarks, :activity_id)) + create_if_not_exists(index(:bookmarks, :folder_id)) + create_if_not_exists(index(:chats, :recipient)) + create_if_not_exists(index(:mfa_tokens, :authorization_id)) + create_if_not_exists(index(:mfa_tokens, :user_id)) + create_if_not_exists(index(:notifications, :activity_id)) + create_if_not_exists(index(:oauth_authorizations, :app_id)) + create_if_not_exists(index(:oauth_authorizations, :user_id)) + create_if_not_exists(index(:password_reset_tokens, :user_id)) + create_if_not_exists(index(:push_subscriptions, :token_id)) + create_if_not_exists(index(:report_notes, :activity_id)) + create_if_not_exists(index(:report_notes, :user_id)) + create_if_not_exists(index(:user_notes, :target_id)) + end +end From 5f6e477ecaa941a79b22599aca169164b7241bcf Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Thu, 30 May 2024 10:51:50 -0400 Subject: [PATCH 19/22] Missing FKs changelog --- changelog.d/missing-fks.add | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/missing-fks.add diff --git a/changelog.d/missing-fks.add b/changelog.d/missing-fks.add new file mode 100644 index 0000000000..cf74de03bc --- /dev/null +++ b/changelog.d/missing-fks.add @@ -0,0 +1 @@ +Add missing indexes on foreign key relationships From f5065eaf99a76695df26966055f90368df7043f3 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Thu, 30 May 2024 11:09:42 -0400 Subject: [PATCH 20/22] Fix Logger.warn deprecation error on OTP25 --- changelog.d/mrf-nsfw-otp25.skip | 1 + lib/pleroma/web/activity_pub/mrf/nsfw_api_policy.ex | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/mrf-nsfw-otp25.skip diff --git a/changelog.d/mrf-nsfw-otp25.skip b/changelog.d/mrf-nsfw-otp25.skip new file mode 100644 index 0000000000..e804f19a00 --- /dev/null +++ b/changelog.d/mrf-nsfw-otp25.skip @@ -0,0 +1 @@ +noop diff --git a/lib/pleroma/web/activity_pub/mrf/nsfw_api_policy.ex b/lib/pleroma/web/activity_pub/mrf/nsfw_api_policy.ex index f7863039b3..3d1c273b9a 100644 --- a/lib/pleroma/web/activity_pub/mrf/nsfw_api_policy.ex +++ b/lib/pleroma/web/activity_pub/mrf/nsfw_api_policy.ex @@ -64,7 +64,7 @@ def parse_url(url) do Jason.decode(body) else error -> - Logger.warn(""" + Logger.warning(""" [NsfwApiPolicy]: The API server failed. Skipping. #{inspect(error)} """) From cfc8d7aade526b8f119683984977064cd3cd3d87 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 28 May 2024 13:14:34 -0400 Subject: [PATCH 21/22] IPFS uploader: dialyzer fixes lib/pleroma/uploaders/ipfs.ex:43:no_return Function put_file/1 has no local return. ________________________________________________________________________________ lib/pleroma/uploaders/ipfs.ex:49:call The function call will not succeed. Pleroma.HTTP.post( binary(), _mp :: %Tesla.Multipart{ :boundary => binary(), :content_type_params => [binary()], :parts => [ %Tesla.Multipart.Part{ :body => binary(), :dispositions => [any()], :headers => [any()] }, ... ] }, [], [{:params, [{:"cid-version", <<49>>}]}] ) will never return since the success typing is: (binary(), binary(), [{binary(), binary()}], Keyword.t()) :: {:error, _} | {:ok, %Tesla.Env{ :__client__ => %Tesla.Client{ :adapter => nil | {_, _} | {_, _, _}, :fun => _, :post => [any()], :pre => [any()] }, :__module__ => atom(), :body => _, :headers => [{_, _}], :method => :delete | :get | :head | :options | :patch | :post | :put | :trace, :opts => [{_, _}], :query => [{_, _}], :status => nil | integer(), :url => binary() }} and the contract is (Pleroma.HTTP.Request.url(), String.t(), Pleroma.HTTP.Request.headers(), :elixir.keyword()) :: {:ok, Tesla.Env.t()} | {:error, any()} --- changelog.d/ipfs-dialyzer.skip | 1 + config/config.exs | 4 +- lib/pleroma/http.ex | 4 +- lib/pleroma/uploaders/ipfs.ex | 57 +++++++++++++--------------- test/pleroma/uploaders/ipfs_test.exs | 51 ++++++++++++------------- 5 files changed, 55 insertions(+), 62 deletions(-) create mode 100644 changelog.d/ipfs-dialyzer.skip diff --git a/changelog.d/ipfs-dialyzer.skip b/changelog.d/ipfs-dialyzer.skip new file mode 100644 index 0000000000..b3e74cd19a --- /dev/null +++ b/changelog.d/ipfs-dialyzer.skip @@ -0,0 +1 @@ +no comment diff --git a/config/config.exs b/config/config.exs index c3b8ae0b72..a40ed28af1 100644 --- a/config/config.exs +++ b/config/config.exs @@ -83,8 +83,8 @@ scheme: "https://" config :pleroma, Pleroma.Uploaders.IPFS, - post_gateway_url: nil, - get_gateway_url: nil + post_gateway_url: "http://localhost:5001", + get_gateway_url: "http://localhost:8080" config :pleroma, :emoji, shortcode_globs: ["/emoji/custom/**/*.png"], diff --git a/lib/pleroma/http.ex b/lib/pleroma/http.ex index eec61cf141..ec837e5092 100644 --- a/lib/pleroma/http.ex +++ b/lib/pleroma/http.ex @@ -37,7 +37,7 @@ def head(url, headers \\ [], options \\ []), do: request(:head, url, "", headers See `Pleroma.HTTP.request/5` """ - @spec post(Request.url(), String.t(), Request.headers(), keyword()) :: + @spec post(Request.url(), Tesla.Env.body(), Request.headers(), keyword()) :: {:ok, Env.t()} | {:error, any()} def post(url, body, headers \\ [], options \\ []), do: request(:post, url, body, headers, options) @@ -56,7 +56,7 @@ def post(url, body, headers \\ [], options \\ []), `{:ok, %Tesla.Env{}}` or `{:error, error}` """ - @spec request(method(), Request.url(), String.t(), Request.headers(), keyword()) :: + @spec request(method(), Request.url(), Tesla.Env.body(), Request.headers(), keyword()) :: {:ok, Env.t()} | {:error, any()} def request(method, url, body, headers, options) when is_binary(url) do uri = URI.parse(url) diff --git a/lib/pleroma/uploaders/ipfs.ex b/lib/pleroma/uploaders/ipfs.ex index d171e46525..5930a129e2 100644 --- a/lib/pleroma/uploaders/ipfs.ex +++ b/lib/pleroma/uploaders/ipfs.ex @@ -8,23 +8,10 @@ defmodule Pleroma.Uploaders.IPFS do alias Tesla.Multipart + @api_add "/api/v0/add" + @api_delete "/api/v0/files/rm" @config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config) - defp get_final_url(method) do - config = @config_impl.get([__MODULE__]) - post_base_url = Keyword.get(config, :post_gateway_url) - - Path.join([post_base_url, method]) - end - - def put_file_endpoint do - get_final_url("/api/v0/add") - end - - def delete_file_endpoint do - get_final_url("/api/v0/files/rm") - end - @placeholder "{CID}" def placeholder, do: @placeholder @@ -40,26 +27,26 @@ def get_file(file) do end @impl true - def put_file(%Pleroma.Upload{} = upload) do + def put_file(%Pleroma.Upload{tempfile: tempfile}) do mp = Multipart.new() |> Multipart.add_content_type_param("charset=utf-8") - |> Multipart.add_file(upload.tempfile) + |> Multipart.add_file(tempfile) - case Pleroma.HTTP.post(put_file_endpoint(), mp, [], params: ["cid-version": "1"]) do - {:ok, ret} -> - case Jason.decode(ret.body) do - {:ok, ret} -> - if Map.has_key?(ret, "Hash") do - {:ok, {:file, ret["Hash"]}} - else - {:error, "JSON doesn't contain Hash key"} - end + endpoint = ipfs_endpoint(@api_add) - error -> - Logger.error("#{__MODULE__}: #{inspect(error)}") - {:error, "JSON decode failed"} - end + with {:ok, %{body: body}} when is_binary(body) <- + Pleroma.HTTP.post(endpoint, mp, [], params: ["cid-version": "1"], pool: :upload), + {_, {:ok, decoded}} <- {:json, Jason.decode(body)}, + {_, true} <- {:hash, Map.has_key?(decoded, "Hash")} do + {:ok, {:file, decoded["Hash"]}} + else + {:hash, false} -> + {:error, "JSON doesn't contain Hash key"} + + {:json, error} -> + Logger.error("#{__MODULE__}: #{inspect(error)}") + {:error, "JSON decode failed"} error -> Logger.error("#{__MODULE__}: #{inspect(error)}") @@ -69,9 +56,17 @@ def put_file(%Pleroma.Upload{} = upload) do @impl true def delete_file(file) do - case Pleroma.HTTP.post(delete_file_endpoint(), "", [], params: [arg: file]) do + endpoint = ipfs_endpoint(@api_delete) + + case Pleroma.HTTP.post(endpoint, "", [], params: [arg: file]) do {:ok, %{status: 204}} -> :ok error -> {:error, inspect(error)} end end + + defp ipfs_endpoint(path) do + URI.parse(@config_impl.get([__MODULE__, :post_gateway_url])) + |> Map.put(:path, path) + |> URI.to_string() + end end diff --git a/test/pleroma/uploaders/ipfs_test.exs b/test/pleroma/uploaders/ipfs_test.exs index cf325b54f2..bdf2933ac5 100644 --- a/test/pleroma/uploaders/ipfs_test.exs +++ b/test/pleroma/uploaders/ipfs_test.exs @@ -14,25 +14,6 @@ defmodule Pleroma.Uploaders.IPFSTest do alias Pleroma.UnstubbedConfigMock, as: Config - describe "get_final_url" do - setup do - Config - |> expect(:get, fn [Pleroma.Uploaders.IPFS] -> - [post_gateway_url: "http://localhost:5001"] - end) - - :ok - end - - test "it returns the final url for put_file" do - assert IPFS.put_file_endpoint() == "http://localhost:5001/api/v0/add" - end - - test "it returns the final url for delete_file" do - assert IPFS.delete_file_endpoint() == "http://localhost:5001/api/v0/files/rm" - end - end - describe "get_file/1" do setup do Config @@ -71,8 +52,8 @@ test "it returns path to ipfs file with cid as path" do describe "put_file/1" do setup do Config - |> expect(:get, fn [Pleroma.Uploaders.IPFS] -> - [post_gateway_url: "http://localhost:5001"] + |> expect(:get, fn [Pleroma.Uploaders.IPFS, :post_gateway_url] -> + "http://localhost:5001" end) file_upload = %Pleroma.Upload{ @@ -92,7 +73,11 @@ test "it returns path to ipfs file with cid as path" do test "save file", %{file_upload: file_upload} do with_mock Pleroma.HTTP, - post: fn "http://localhost:5001/api/v0/add", _mp, [], params: ["cid-version": "1"] -> + post: fn "http://localhost:5001/api/v0/add", + _mp, + [], + params: ["cid-version": "1"], + pool: :upload -> {:ok, %Tesla.Env{ status: 200, @@ -107,7 +92,11 @@ test "save file", %{file_upload: file_upload} do test "returns error", %{file_upload: file_upload} do with_mock Pleroma.HTTP, - post: fn "http://localhost:5001/api/v0/add", _mp, [], params: ["cid-version": "1"] -> + post: fn "http://localhost:5001/api/v0/add", + _mp, + [], + params: ["cid-version": "1"], + pool: :upload -> {:error, "IPFS Gateway upload failed"} end do assert capture_log(fn -> @@ -118,7 +107,11 @@ test "returns error", %{file_upload: file_upload} do test "returns error if JSON decode fails", %{file_upload: file_upload} do with_mock Pleroma.HTTP, [], - post: fn "http://localhost:5001/api/v0/add", _mp, [], params: ["cid-version": "1"] -> + post: fn "http://localhost:5001/api/v0/add", + _mp, + [], + params: ["cid-version": "1"], + pool: :upload -> {:ok, %Tesla.Env{status: 200, body: "invalid"}} end do assert capture_log(fn -> @@ -130,7 +123,11 @@ test "returns error if JSON decode fails", %{file_upload: file_upload} do test "returns error if JSON body doesn't contain Hash key", %{file_upload: file_upload} do with_mock Pleroma.HTTP, [], - post: fn "http://localhost:5001/api/v0/add", _mp, [], params: ["cid-version": "1"] -> + post: fn "http://localhost:5001/api/v0/add", + _mp, + [], + params: ["cid-version": "1"], + pool: :upload -> {:ok, %Tesla.Env{status: 200, body: "{\"key\": \"value\"}"}} end do assert IPFS.put_file(file_upload) == {:error, "JSON doesn't contain Hash key"} @@ -141,8 +138,8 @@ test "returns error if JSON body doesn't contain Hash key", %{file_upload: file_ describe "delete_file/1" do setup do Config - |> expect(:get, fn [Pleroma.Uploaders.IPFS] -> - [post_gateway_url: "http://localhost:5001"] + |> expect(:get, fn [Pleroma.Uploaders.IPFS, :post_gateway_url] -> + "http://localhost:5001" end) :ok From 0302431888d457d254f152a502946e6ffe7935e4 Mon Sep 17 00:00:00 2001 From: Floatingghost Date: Fri, 31 May 2024 09:04:00 -0400 Subject: [PATCH 22/22] Use proper workers for fetching pins instead of an ad-hoc task BUG: https://git.pleroma.social/pleroma/pleroma/-/issues/3276 --- changelog.d/pinned-collection-fetch.security | 1 + lib/pleroma/web/activity_pub/activity_pub.ex | 25 ++++++++-------- .../web/activity_pub/activity_pub_test.exs | 30 +++++++++++++++---- 3 files changed, 38 insertions(+), 18 deletions(-) create mode 100644 changelog.d/pinned-collection-fetch.security diff --git a/changelog.d/pinned-collection-fetch.security b/changelog.d/pinned-collection-fetch.security new file mode 100644 index 0000000000..4e87469242 --- /dev/null +++ b/changelog.d/pinned-collection-fetch.security @@ -0,0 +1 @@ +Use proper workers for fetching pins instead of an ad-hoc task, fixing a potential fetch loop diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 5bb0fba6e6..1247ae7cea 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -1794,24 +1794,25 @@ def fetch_and_prepare_featured_from_ap_id(ap_id) do end end - def pinned_fetch_task(nil), do: nil - - def pinned_fetch_task(%{pinned_objects: pins}) do - if Enum.all?(pins, fn {ap_id, _} -> - Object.get_cached_by_ap_id(ap_id) || - match?({:ok, _object}, Fetcher.fetch_object_from_id(ap_id)) - end) do - :ok - else - :error - end + def enqueue_pin_fetches(%{pinned_objects: pins}) do + # enqueue a task to fetch all pinned objects + Enum.each(pins, fn {ap_id, _} -> + if is_nil(Object.get_cached_by_ap_id(ap_id)) do + Pleroma.Workers.RemoteFetcherWorker.enqueue("fetch_remote", %{ + "id" => ap_id, + "depth" => 1 + }) + end + end) end + def enqueue_pin_fetches(_), do: nil + def make_user_from_ap_id(ap_id, additional \\ []) do user = User.get_cached_by_ap_id(ap_id) with {:ok, data} <- fetch_and_prepare_user_from_ap_id(ap_id, additional) do - {:ok, _pid} = Task.start(fn -> pinned_fetch_task(data) end) + enqueue_pin_fetches(data) if user do user diff --git a/test/pleroma/web/activity_pub/activity_pub_test.exs b/test/pleroma/web/activity_pub/activity_pub_test.exs index 5242943855..d278125ee5 100644 --- a/test/pleroma/web/activity_pub/activity_pub_test.exs +++ b/test/pleroma/web/activity_pub/activity_pub_test.exs @@ -291,9 +291,7 @@ test "fetches user featured collection" do body: featured_data, headers: [{"content-type", "application/activity+json"}] } - end) - Tesla.Mock.mock_global(fn %{ method: :get, url: ^object_url @@ -306,7 +304,18 @@ test "fetches user featured collection" do end) {:ok, user} = ActivityPub.make_user_from_ap_id(ap_id) - Process.sleep(50) + + assert_enqueued( + worker: Pleroma.Workers.RemoteFetcherWorker, + args: %{ + "op" => "fetch_remote", + "id" => object_url, + "depth" => 1 + } + ) + + # wait for oban + Pleroma.Tests.ObanHelpers.perform_all() assert user.featured_address == featured_url assert Map.has_key?(user.pinned_objects, object_url) @@ -368,9 +377,7 @@ test "fetches user featured collection without embedded object" do body: featured_data, headers: [{"content-type", "application/activity+json"}] } - end) - Tesla.Mock.mock_global(fn %{ method: :get, url: ^object_url @@ -383,7 +390,18 @@ test "fetches user featured collection without embedded object" do end) {:ok, user} = ActivityPub.make_user_from_ap_id(ap_id) - Process.sleep(50) + + assert_enqueued( + worker: Pleroma.Workers.RemoteFetcherWorker, + args: %{ + "op" => "fetch_remote", + "id" => object_url, + "depth" => 1 + } + ) + + # wait for oban + Pleroma.Tests.ObanHelpers.perform_all() assert user.featured_address == featured_url assert Map.has_key?(user.pinned_objects, object_url)