From 9a2523a09a2d9ccf5db12ef648444c51a98ab113 Mon Sep 17 00:00:00 2001 From: "Haelwenn (lanodan) Monnier" Date: Mon, 6 Mar 2023 21:16:34 +0100 Subject: [PATCH] instances: Store some metadata based on NodeInfo --- lib/pleroma/instances/instance.ex | 100 +++++++++++++++++- .../20230306112859_instances_add_metadata.exs | 14 +++ test/fixtures/mastodon-nodeinfo20.json | 1 + .../mastodon-well-known-nodeinfo.json | 1 + test/fixtures/wildebeest-nodeinfo21.json | 1 + .../wildebeest-well-known-nodeinfo.json | 1 + test/pleroma/instances/instance_test.exs | 60 +++++++++++ 7 files changed, 177 insertions(+), 1 deletion(-) create mode 100644 priv/repo/migrations/20230306112859_instances_add_metadata.exs create mode 100644 test/fixtures/mastodon-nodeinfo20.json create mode 100644 test/fixtures/mastodon-well-known-nodeinfo.json create mode 100644 test/fixtures/wildebeest-nodeinfo21.json create mode 100644 test/fixtures/wildebeest-well-known-nodeinfo.json diff --git a/lib/pleroma/instances/instance.ex b/lib/pleroma/instances/instance.ex index a5529ad44d..9756c66dc3 100644 --- a/lib/pleroma/instances/instance.ex +++ b/lib/pleroma/instances/instance.ex @@ -7,6 +7,7 @@ defmodule Pleroma.Instances.Instance do alias Pleroma.Instances alias Pleroma.Instances.Instance + alias Pleroma.Maps alias Pleroma.Repo alias Pleroma.User alias Pleroma.Workers.BackgroundWorker @@ -24,6 +25,14 @@ defmodule Pleroma.Instances.Instance do field(:favicon, :string) field(:favicon_updated_at, :naive_datetime) + embeds_one :metadata, Pleroma.Instances.Metadata, primary_key: false do + field(:software_name, :string) + field(:software_version, :string) + field(:software_repository, :string) + end + + field(:metadata_updated_at, :utc_datetime) + timestamps() end @@ -31,11 +40,17 @@ defmodule Pleroma.Instances.Instance do def changeset(struct, params \\ %{}) do struct - |> cast(params, [:host, :unreachable_since, :favicon, :favicon_updated_at]) + |> cast(params, __schema__(:fields) -- [:metadata]) + |> cast_embed(:metadata, with: &metadata_changeset/2) |> validate_required([:host]) |> unique_constraint(:host) end + def metadata_changeset(struct, params \\ %{}) do + struct + |> cast(params, [:software_name, :software_version, :software_repository]) + end + def filter_reachable([]), do: %{} def filter_reachable(urls_or_hosts) when is_list(urls_or_hosts) do @@ -198,6 +213,89 @@ defp scrape_favicon(%URI{} = instance_uri) do end end + def get_or_update_metadata(%URI{host: host} = instance_uri) do + existing_record = Repo.get_by(Instance, %{host: host}) + now = NaiveDateTime.utc_now() + + if existing_record && existing_record.metadata_updated_at && + NaiveDateTime.diff(now, existing_record.metadata_updated_at) < 86_400 do + existing_record.metadata + else + metadata = scrape_metadata(instance_uri) + + if existing_record do + existing_record + |> changeset(%{metadata: metadata, metadata_updated_at: now}) + |> Repo.update() + else + %Instance{} + |> changeset(%{host: host, metadata: metadata, metadata_updated_at: now}) + |> Repo.insert() + end + + metadata + end + end + + defp get_nodeinfo_uri(well_known) do + links = Map.get(well_known, "links", []) + + nodeinfo21 = + Enum.find(links, &(&1["rel"] == "http://nodeinfo.diaspora.software/ns/schema/2.1"))["href"] + + nodeinfo20 = + Enum.find(links, &(&1["rel"] == "http://nodeinfo.diaspora.software/ns/schema/2.0"))["href"] + + cond do + is_binary(nodeinfo21) -> {:ok, nodeinfo21} + is_binary(nodeinfo20) -> {:ok, nodeinfo20} + true -> {:error, :no_links} + end + end + + defp scrape_metadata(%URI{} = instance_uri) do + try do + with {_, true} <- {:reachable, reachable?(instance_uri.host)}, + {:ok, %Tesla.Env{body: well_known_body}} <- + instance_uri + |> URI.merge("/.well-known/nodeinfo") + |> to_string() + |> Pleroma.HTTP.get([{"accept", "application/json"}]), + {:ok, well_known_json} <- Jason.decode(well_known_body), + {:ok, nodeinfo_uri} <- get_nodeinfo_uri(well_known_json), + {:ok, %Tesla.Env{body: nodeinfo_body}} <- + Pleroma.HTTP.get(nodeinfo_uri, [{"accept", "application/json"}]), + {:ok, nodeinfo} <- Jason.decode(nodeinfo_body) do + # Can extract more metadata from NodeInfo but need to be careful about it's size, + # can't just dump the entire thing + software = Map.get(nodeinfo, "software", %{}) + + %{ + software_name: software["name"], + software_version: software["version"] + } + |> Maps.put_if_present(:software_repository, software["repository"]) + else + {:reachable, false} -> + Logger.debug( + "Instance.scrape_metadata(\"#{to_string(instance_uri)}\") ignored unreachable host" + ) + + nil + + _ -> + nil + end + rescue + e -> + Logger.warn( + "Instance.scrape_metadata(\"#{to_string(instance_uri)}\") error: #{inspect(e)}" + ) + + nil + end + end + @doc """ Deletes all users from an instance in a background task, thus also deleting all of those users' activities and notifications. diff --git a/priv/repo/migrations/20230306112859_instances_add_metadata.exs b/priv/repo/migrations/20230306112859_instances_add_metadata.exs new file mode 100644 index 0000000000..898f5220e0 --- /dev/null +++ b/priv/repo/migrations/20230306112859_instances_add_metadata.exs @@ -0,0 +1,14 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2023 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Repo.Migrations.InstancesAddMetadata do + use Ecto.Migration + + def change do + alter table(:instances) do + add(:metadata, :map) + add(:metadata_updated_at, :utc_datetime) + end + end +end diff --git a/test/fixtures/mastodon-nodeinfo20.json b/test/fixtures/mastodon-nodeinfo20.json new file mode 100644 index 0000000000..35010fdf01 --- /dev/null +++ b/test/fixtures/mastodon-nodeinfo20.json @@ -0,0 +1 @@ +{"version":"2.0","software":{"name":"mastodon","version":"4.1.0"},"protocols":["activitypub"],"services":{"outbound":[],"inbound":[]},"usage":{"users":{"total":971090,"activeMonth":167218,"activeHalfyear":384808},"localPosts":52071541},"openRegistrations":true,"metadata":{}} \ No newline at end of file diff --git a/test/fixtures/mastodon-well-known-nodeinfo.json b/test/fixtures/mastodon-well-known-nodeinfo.json new file mode 100644 index 0000000000..237d5462ab --- /dev/null +++ b/test/fixtures/mastodon-well-known-nodeinfo.json @@ -0,0 +1 @@ +{"links":[{"rel":"http://nodeinfo.diaspora.software/ns/schema/2.0","href":"https://mastodon.example.org/nodeinfo/2.0"}]} \ No newline at end of file diff --git a/test/fixtures/wildebeest-nodeinfo21.json b/test/fixtures/wildebeest-nodeinfo21.json new file mode 100644 index 0000000000..c6af474bff --- /dev/null +++ b/test/fixtures/wildebeest-nodeinfo21.json @@ -0,0 +1 @@ +{"version":"2.1","software":{"name":"wildebeest","version":"0.0.1","repository":"https://github.com/cloudflare/wildebeest"},"protocols":["activitypub"],"usage":{"users":{"total":1,"activeMonth":1,"activeHalfyear":1}},"openRegistrations":false,"metadata":{"upstream":{"name":"mastodon","version":"3.5.1"}}} \ No newline at end of file diff --git a/test/fixtures/wildebeest-well-known-nodeinfo.json b/test/fixtures/wildebeest-well-known-nodeinfo.json new file mode 100644 index 0000000000..c7ddb43aff --- /dev/null +++ b/test/fixtures/wildebeest-well-known-nodeinfo.json @@ -0,0 +1 @@ +{"links":[{"rel":"http://nodeinfo.diaspora.software/ns/schema/2.0","href":"https://wildebeest.example.org/nodeinfo/2.0"},{"rel":"http://nodeinfo.diaspora.software/ns/schema/2.1","href":"https://wildebeest.example.org/nodeinfo/2.1"}]} \ No newline at end of file diff --git a/test/pleroma/instances/instance_test.exs b/test/pleroma/instances/instance_test.exs index 861519bcea..a769f93629 100644 --- a/test/pleroma/instances/instance_test.exs +++ b/test/pleroma/instances/instance_test.exs @@ -161,6 +161,66 @@ test "Doesn't scrapes unreachable instances" do end end + describe "get_or_update_metadata/1" do + test "Scrapes Wildebeest NodeInfo" do + Tesla.Mock.mock(fn + %{url: "https://wildebeest.example.org/.well-known/nodeinfo"} -> + %Tesla.Env{ + status: 200, + body: File.read!("test/fixtures/wildebeest-well-known-nodeinfo.json") + } + + %{url: "https://wildebeest.example.org/nodeinfo/2.1"} -> + %Tesla.Env{ + status: 200, + body: File.read!("test/fixtures/wildebeest-nodeinfo21.json") + } + end) + + expected = %{ + software_name: "wildebeest", + software_repository: "https://github.com/cloudflare/wildebeest", + software_version: "0.0.1" + } + + assert expected == + Instance.get_or_update_metadata(URI.parse("https://wildebeest.example.org/")) + + expected = %Pleroma.Instances.Instance.Pleroma.Instances.Metadata{ + software_name: "wildebeest", + software_repository: "https://github.com/cloudflare/wildebeest", + software_version: "0.0.1" + } + + assert expected == + Repo.get_by(Pleroma.Instances.Instance, %{host: "wildebeest.example.org"}).metadata + end + + test "Scrapes Mastodon NodeInfo" do + Tesla.Mock.mock(fn + %{url: "https://mastodon.example.org/.well-known/nodeinfo"} -> + %Tesla.Env{ + status: 200, + body: File.read!("test/fixtures/mastodon-well-known-nodeinfo.json") + } + + %{url: "https://mastodon.example.org/nodeinfo/2.0"} -> + %Tesla.Env{ + status: 200, + body: File.read!("test/fixtures/mastodon-nodeinfo20.json") + } + end) + + expected = %{ + software_name: "mastodon", + software_version: "4.1.0" + } + + assert expected == + Instance.get_or_update_metadata(URI.parse("https://mastodon.example.org/")) + end + end + test "delete_users_and_activities/1 deletes remote instance users and activities" do [mario, luigi, _peach, wario] = users = [