diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ff70e6e51..b5c42d1fdb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Metadata: RelMe provider - OAuth: added support for refresh tokens - Emoji packs and emoji pack manager +- Object pruning (`mix pleroma.database prune_objects`) ### Changed - **Breaking:** Configuration: move from Pleroma.Mailer to Pleroma.Emails.Mailer diff --git a/config/config.exs b/config/config.exs index c3301b2edb..a05f8b1d24 100644 --- a/config/config.exs +++ b/config/config.exs @@ -239,7 +239,8 @@ welcome_message: nil, max_report_comment_size: 1000, safe_dm_mentions: false, - healthcheck: false + healthcheck: false, + remote_post_retention_days: 90 config :pleroma, :app_account_creation, enabled: true, max_requests: 25, interval: 1800 diff --git a/docs/config.md b/docs/config.md index 197326bbd7..a050068f4d 100644 --- a/docs/config.md +++ b/docs/config.md @@ -104,6 +104,7 @@ config :pleroma, Pleroma.Emails.Mailer, * `max_report_comment_size`: The maximum size of the report comment (Default: `1000`) * `safe_dm_mentions`: If set to true, only mentions at the beginning of a post will be used to address people in direct messages. This is to prevent accidental mentioning of people when talking about them (e.g. "@friend hey i really don't like @enemy"). (Default: `false`) * `healthcheck`: if set to true, system data will be shown on ``/api/pleroma/healthcheck``. +* `remote_post_retention_days`: the default amount of days to retain remote posts when pruning the database ## :app_account_creation REST API for creating an account settings diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index f650b447dd..f9bafb2773 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -5,6 +5,7 @@ defmodule Mix.Tasks.Pleroma.Database do alias Mix.Tasks.Pleroma.Common alias Pleroma.Conversation + alias Pleroma.Object alias Pleroma.Repo alias Pleroma.User require Logger @@ -23,6 +24,10 @@ defmodule Mix.Tasks.Pleroma.Database do Options: - `--vacuum` - run `VACUUM FULL` after the embedded objects are replaced with their references + ## Prune old objects from the database + + mix pleroma.database prune_objects + ## Create a conversation for all existing DMs. Can be safely re-run. mix pleroma.database bump_all_conversations @@ -72,4 +77,46 @@ def run(["update_users_following_followers_counts"]) do Enum.each(users, &User.remove_duplicated_following/1) Enum.each(users, &User.update_follower_count/1) end + + def run(["prune_objects" | args]) do + import Ecto.Query + + {options, [], []} = + OptionParser.parse( + args, + strict: [ + vacuum: :boolean + ] + ) + + Common.start_pleroma() + + deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + + Logger.info("Pruning objects older than #{deadline} days") + + time_deadline = + NaiveDateTime.utc_now() + |> NaiveDateTime.add(-(deadline * 86_400)) + + public = "https://www.w3.org/ns/activitystreams#Public" + + from(o in Object, + where: fragment("?->'to' \\? ? OR ?->'cc' \\? ?", o.data, ^public, o.data, ^public), + where: o.inserted_at < ^time_deadline, + where: + fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host()) + ) + |> Repo.delete_all() + + if Keyword.get(options, :vacuum) do + Logger.info("Runnning VACUUM FULL") + + Repo.query!( + "vacuum full;", + [], + timeout: :infinity + ) + end + end end diff --git a/lib/pleroma/object.ex b/lib/pleroma/object.ex index 740d687a35..cc6fc9c5df 100644 --- a/lib/pleroma/object.ex +++ b/lib/pleroma/object.ex @@ -130,6 +130,13 @@ def delete(%Object{data: %{"id" => id}} = object) do end end + def prune(%Object{data: %{"id" => id}} = object) do + with {:ok, object} <- Repo.delete(object), + {:ok, true} <- Cachex.del(:object_cache, "object:#{id}") do + {:ok, object} + end + end + def set_cache(%Object{data: %{"id" => ap_id}} = object) do Cachex.put(:object_cache, "object:#{ap_id}", object) {:ok, object} diff --git a/lib/pleroma/object/fetcher.ex b/lib/pleroma/object/fetcher.ex index 8d4bcc95ef..bb9388d4f9 100644 --- a/lib/pleroma/object/fetcher.ex +++ b/lib/pleroma/object/fetcher.ex @@ -8,6 +8,19 @@ defmodule Pleroma.Object.Fetcher do @httpoison Application.get_env(:pleroma, :httpoison) + defp reinject_object(data) do + Logger.debug("Reinjecting object #{data["id"]}") + + with data <- Transmogrifier.fix_object(data), + {:ok, object} <- Object.create(data) do + {:ok, object} + else + e -> + Logger.error("Error while processing object: #{inspect(e)}") + {:error, e} + end + end + # TODO: # This will create a Create activity, which we need internally at the moment. def fetch_object_from_id(id) do @@ -26,12 +39,17 @@ def fetch_object_from_id(id) do "object" => data }, :ok <- Containment.contain_origin(id, params), - {:ok, activity} <- Transmogrifier.handle_incoming(params) do - {:ok, Object.normalize(activity, false)} + {:ok, activity} <- Transmogrifier.handle_incoming(params), + {:object, _data, %Object{} = object} <- + {:object, data, Object.normalize(activity, false)} do + {:ok, object} else {:error, {:reject, nil}} -> {:reject, nil} + {:object, data, nil} -> + reinject_object(data) + object = %Object{} -> {:ok, object} diff --git a/test/object/fetcher_test.exs b/test/object/fetcher_test.exs index 72f6167829..d604fd5f59 100644 --- a/test/object/fetcher_test.exs +++ b/test/object/fetcher_test.exs @@ -87,4 +87,23 @@ test "all objects with fake directions are rejected by the object fetcher" do ) end end + + describe "pruning" do + test "it can refetch pruned objects" do + object_id = "http://mastodon.example.org/@admin/99541947525187367" + + {:ok, object} = Fetcher.fetch_object_from_id(object_id) + + assert object + + {:ok, _object} = Object.prune(object) + + refute Object.get_by_ap_id(object_id) + + {:ok, %Object{} = object_two} = Fetcher.fetch_object_from_id(object_id) + + assert object.data["id"] == object_two.data["id"] + assert object.id != object_two.id + end + end end