diff --git a/changelog.d/rich_media.fix b/changelog.d/rich_media.fix
new file mode 100644
index 0000000000..08f1195501
--- /dev/null
+++ b/changelog.d/rich_media.fix
@@ -0,0 +1 @@
+Rich Media Preview cache eviction when the activity is updated.
diff --git a/lib/pleroma/activity/html.ex b/lib/pleroma/activity/html.ex
index 706b2d36c0..ba284b4d52 100644
--- a/lib/pleroma/activity/html.ex
+++ b/lib/pleroma/activity/html.ex
@@ -28,7 +28,7 @@ defp get_cache_keys_for(activity_id) do
end
end
- defp add_cache_key_for(activity_id, additional_key) do
+ def add_cache_key_for(activity_id, additional_key) do
current = get_cache_keys_for(activity_id)
unless additional_key in current do
diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex
index 5bf735c4f7..84ff2f1297 100644
--- a/lib/pleroma/html.ex
+++ b/lib/pleroma/html.ex
@@ -6,8 +6,6 @@ defmodule Pleroma.HTML do
# Scrubbers are compiled on boot so they can be configured in OTP releases
# @on_load :compile_scrubbers
- @cachex Pleroma.Config.get([:cachex, :provider], Cachex)
-
def compile_scrubbers do
dir = Path.join(:code.priv_dir(:pleroma), "scrubbers")
@@ -67,27 +65,20 @@ def ensure_scrubbed_html(
end
end
- def extract_first_external_url_from_object(%{data: %{"content" => content}} = object)
+ @spec extract_first_external_url_from_object(Pleroma.Object.t()) ::
+ {:ok, String.t()} | {:error, :no_content}
+ def extract_first_external_url_from_object(%{data: %{"content" => content}})
when is_binary(content) do
- unless object.data["fake"] do
- key = "URL|#{object.id}"
+ url =
+ content
+ |> Floki.parse_fragment!()
+ |> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])")
+ |> Enum.take(1)
+ |> Floki.attribute("href")
+ |> Enum.at(0)
- @cachex.fetch!(:scrubber_cache, key, fn _key ->
- {:commit, {:ok, extract_first_external_url(content)}}
- end)
- else
- {:ok, extract_first_external_url(content)}
- end
+ {:ok, url}
end
def extract_first_external_url_from_object(_), do: {:error, :no_content}
-
- def extract_first_external_url(content) do
- content
- |> Floki.parse_fragment!()
- |> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])")
- |> Enum.take(1)
- |> Floki.attribute("href")
- |> Enum.at(0)
- end
end
diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex
index 61000bb9bd..1501776d98 100644
--- a/lib/pleroma/web/rich_media/helpers.ex
+++ b/lib/pleroma/web/rich_media/helpers.ex
@@ -8,6 +8,8 @@ defmodule Pleroma.Web.RichMedia.Helpers do
alias Pleroma.Object
alias Pleroma.Web.RichMedia.Parser
+ @cachex Pleroma.Config.get([:cachex, :provider], Cachex)
+
@config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config)
@options [
@@ -25,9 +27,11 @@ defp validate_page_url(page_url) when is_binary(page_url) do
|> parse_uri(page_url)
end
- defp validate_page_url(%URI{host: host, scheme: "https", authority: authority})
- when is_binary(authority) do
+ defp validate_page_url(%URI{host: host, scheme: "https"}) do
cond do
+ Linkify.Parser.ip?(host) ->
+ :error
+
host in @config_impl.get([:rich_media, :ignore_hosts], []) ->
:error
@@ -71,7 +75,24 @@ def fetch_data_for_object(object) do
def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) do
with true <- @config_impl.get([:rich_media, :enabled]),
%Object{} = object <- Object.normalize(activity, fetch: false) do
- fetch_data_for_object(object)
+ if object.data["fake"] do
+ fetch_data_for_object(object)
+ else
+ key = "URL|#{activity.id}"
+
+ @cachex.fetch!(:scrubber_cache, key, fn _ ->
+ result = fetch_data_for_object(object)
+
+ cond do
+ match?(%{page_url: _, rich_media: _}, result) ->
+ Activity.HTML.add_cache_key_for(activity.id, key)
+ {:commit, result}
+
+ true ->
+ {:ignore, %{}}
+ end
+ end)
+ end
else
_ -> %{}
end
diff --git a/test/fixtures/rich_media/google.html b/test/fixtures/rich_media/google.html
new file mode 100644
index 0000000000..c068397a5f
--- /dev/null
+++ b/test/fixtures/rich_media/google.html
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/fixtures/rich_media/yahoo.html b/test/fixtures/rich_media/yahoo.html
new file mode 100644
index 0000000000..41d8c5cd96
--- /dev/null
+++ b/test/fixtures/rich_media/yahoo.html
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/pleroma/web/rich_media/helpers_test.exs b/test/pleroma/web/rich_media/helpers_test.exs
index 3ef5705ce5..bf73724767 100644
--- a/test/pleroma/web/rich_media/helpers_test.exs
+++ b/test/pleroma/web/rich_media/helpers_test.exs
@@ -83,8 +83,34 @@ test "crawls valid, complete URLs" do
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
end
- # This does not seem to work. The urls are being fetched.
- @tag skip: true
+ test "recrawls URLs on updates" do
+ original_url = "https://google.com/"
+ updated_url = "https://yahoo.com/"
+
+ Pleroma.StaticStubbedConfigMock
+ |> stub(:get, fn
+ [:rich_media, :enabled] -> true
+ path -> Pleroma.Test.StaticConfig.get(path)
+ end)
+
+ user = insert(:user)
+ {:ok, activity} = CommonAPI.post(user, %{status: "I like this site #{original_url}"})
+
+ assert match?(
+ %{page_url: ^original_url, rich_media: _},
+ Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
+ )
+
+ {:ok, _} = CommonAPI.update(user, activity, %{status: "I like this site #{updated_url}"})
+
+ activity = Pleroma.Activity.get_by_id(activity.id)
+
+ assert match?(
+ %{page_url: ^updated_url, rich_media: _},
+ Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
+ )
+ end
+
test "refuses to crawl URLs of private network from posts" do
user = insert(:user)
@@ -102,10 +128,10 @@ test "refuses to crawl URLs of private network from posts" do
path -> Pleroma.Test.StaticConfig.get(path)
end)
- assert %{} = Helpers.fetch_data_for_activity(activity)
- assert %{} = Helpers.fetch_data_for_activity(activity2)
- assert %{} = Helpers.fetch_data_for_activity(activity3)
- assert %{} = Helpers.fetch_data_for_activity(activity4)
- assert %{} = Helpers.fetch_data_for_activity(activity5)
+ assert %{} == Helpers.fetch_data_for_activity(activity)
+ assert %{} == Helpers.fetch_data_for_activity(activity2)
+ assert %{} == Helpers.fetch_data_for_activity(activity3)
+ assert %{} == Helpers.fetch_data_for_activity(activity4)
+ assert %{} == Helpers.fetch_data_for_activity(activity5)
end
end
diff --git a/test/support/http_request_mock.ex b/test/support/http_request_mock.ex
index f76128312e..df3371a75f 100644
--- a/test/support/http_request_mock.ex
+++ b/test/support/http_request_mock.ex
@@ -1464,6 +1464,14 @@ def get("https://misskey.io/notes/8vs6wxufd0", _, _, _) do
}}
end
+ def get("https://google.com/", _, _, _) do
+ {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/google.html")}}
+ end
+
+ def get("https://yahoo.com/", _, _, _) do
+ {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/yahoo.html")}}
+ end
+
def get(url, query, body, headers) do
{:error,
"Mock response not implemented for GET #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"}
@@ -1539,7 +1547,10 @@ def post(url, query, body, headers) do
@rich_media_mocks [
"https://example.com/ogp",
"https://example.com/ogp-missing-data",
- "https://example.com/twitter-card"
+ "https://example.com/twitter-card",
+ "https://google.com/",
+ "https://yahoo.com/",
+ "https://pleroma.local/notice/9kCP7V"
]
def head(url, _query, _body, _headers) when url in @rich_media_mocks do
{:ok, %Tesla.Env{status: 404, body: ""}}