From 6b7b443ff95587b33f4b666e68ed82dc6fb485a5 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 6 Feb 2024 14:34:59 -0500 Subject: [PATCH] Pleroma.Web.RichMedia.Parser: Remove test-specific codepaths Also consolidate Tesla mocks into the HttpRequestMock module. Tests were not exercising the real codepaths. The Rich Media Preview only works with https, but most of these tests were only mocking http. --- lib/pleroma/caching.ex | 3 + lib/pleroma/web/rich_media/parser.ex | 101 ++++++++--------- test/fixtures/rich_media/oembed.html | 2 +- .../controllers/status_controller_test.exs | 8 +- .../chat_message_reference_view_test.exs | 1 + test/pleroma/web/rich_media/helpers_test.exs | 4 +- test/pleroma/web/rich_media/parser_test.exs | 105 +++--------------- test/support/cachex_proxy.ex | 6 + test/support/http_request_mock.ex | 61 +++++++++- test/support/null_cache.ex | 6 + 10 files changed, 144 insertions(+), 153 deletions(-) diff --git a/lib/pleroma/caching.ex b/lib/pleroma/caching.ex index eb05887089..796a465af3 100644 --- a/lib/pleroma/caching.ex +++ b/lib/pleroma/caching.ex @@ -8,10 +8,13 @@ defmodule Pleroma.Caching do @callback put(Cachex.cache(), any(), any(), Keyword.t()) :: {Cachex.status(), boolean()} @callback put(Cachex.cache(), any(), any()) :: {Cachex.status(), boolean()} @callback fetch!(Cachex.cache(), any(), function() | nil) :: any() + @callback fetch(Cachex.cache(), any(), function() | nil) :: + {atom(), any()} | {atom(), any(), any()} # @callback del(Cachex.cache(), any(), Keyword.t()) :: {Cachex.status(), boolean()} @callback del(Cachex.cache(), any()) :: {Cachex.status(), boolean()} @callback stream!(Cachex.cache(), any()) :: Enumerable.t() @callback expire_at(Cachex.cache(), binary(), number()) :: {Cachex.status(), boolean()} + @callback expire(Cachex.cache(), binary(), number()) :: {Cachex.status(), boolean()} @callback exists?(Cachex.cache(), any()) :: {Cachex.status(), boolean()} @callback execute!(Cachex.cache(), function()) :: any() @callback get_and_update(Cachex.cache(), any(), function()) :: diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index 837182d8ab..a791b2babb 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -13,70 +13,65 @@ defp parsers do def parse(nil), do: {:error, "No URL provided"} - if Pleroma.Config.get(:env) == :test do - @spec parse(String.t()) :: {:ok, map()} | {:error, any()} - def parse(url), do: parse_url(url) - else - @spec parse(String.t()) :: {:ok, map()} | {:error, any()} - def parse(url) do - with {:ok, data} <- get_cached_or_parse(url), - {:ok, _} <- set_ttl_based_on_image(data, url) do - {:ok, data} - end + @spec parse(String.t()) :: {:ok, map()} | {:error, any()} + def parse(url) do + with {:ok, data} <- get_cached_or_parse(url), + {:ok, _} <- set_ttl_based_on_image(data, url) do + {:ok, data} end + end - defp get_cached_or_parse(url) do - case @cachex.fetch(:rich_media_cache, url, fn -> - case parse_url(url) do - {:ok, _} = res -> - {:commit, res} + defp get_cached_or_parse(url) do + case @cachex.fetch(:rich_media_cache, url, fn -> + case parse_url(url) do + {:ok, _} = res -> + {:commit, res} - {:error, reason} = e -> - # Unfortunately we have to log errors here, instead of doing that - # along with ttl setting at the bottom. Otherwise we can get log spam - # if more than one process was waiting for the rich media card - # while it was generated. Ideally we would set ttl here as well, - # so we don't override it number_of_waiters_on_generation - # times, but one, obviously, can't set ttl for not-yet-created entry - # and Cachex doesn't support returning ttl from the fetch callback. - log_error(url, reason) - {:commit, e} - end - end) do - {action, res} when action in [:commit, :ok] -> - case res do - {:ok, _data} = res -> - res + {:error, reason} = e -> + # Unfortunately we have to log errors here, instead of doing that + # along with ttl setting at the bottom. Otherwise we can get log spam + # if more than one process was waiting for the rich media card + # while it was generated. Ideally we would set ttl here as well, + # so we don't override it number_of_waiters_on_generation + # times, but one, obviously, can't set ttl for not-yet-created entry + # and Cachex doesn't support returning ttl from the fetch callback. + log_error(url, reason) + {:commit, e} + end + end) do + {action, res} when action in [:commit, :ok] -> + case res do + {:ok, _data} = res -> + res - {:error, reason} = e -> - if action == :commit, do: set_error_ttl(url, reason) - e - end + {:error, reason} = e -> + if action == :commit, do: set_error_ttl(url, reason) + e + end - {:error, e} -> - {:error, {:cachex_error, e}} - end + {:error, e} -> + {:error, {:cachex_error, e}} end + end - defp set_error_ttl(_url, :body_too_large), do: :ok - defp set_error_ttl(_url, {:content_type, _}), do: :ok + defp set_error_ttl(_url, :body_too_large), do: :ok + defp set_error_ttl(_url, {:content_type, _}), do: :ok - # The TTL is not set for the errors above, since they are unlikely to change - # with time + # The TTL is not set for the errors above, since they are unlikely to change + # with time - defp set_error_ttl(url, _reason) do - ttl = Pleroma.Config.get([:rich_media, :failure_backoff], 60_000) - @cachex.expire(:rich_media_cache, url, ttl) - :ok - end + defp set_error_ttl(url, _reason) do + ttl = Pleroma.Config.get([:rich_media, :failure_backoff], 60_000) + @cachex.expire(:rich_media_cache, url, ttl) + :ok + end - defp log_error(url, {:invalid_metadata, data}) do - Logger.debug(fn -> "Incomplete or invalid metadata for #{url}: #{inspect(data)}" end) - end + defp log_error(url, {:invalid_metadata, data}) do + Logger.debug(fn -> "Incomplete or invalid metadata for #{url}: #{inspect(data)}" end) + end - defp log_error(url, reason) do - Logger.warning(fn -> "Rich media error for #{url}: #{inspect(reason)}" end) - end + defp log_error(url, reason) do + Logger.warning(fn -> "Rich media error for #{url}: #{inspect(reason)}" end) end @doc """ diff --git a/test/fixtures/rich_media/oembed.html b/test/fixtures/rich_media/oembed.html index 55f17004bc..5429630d0c 100644 --- a/test/fixtures/rich_media/oembed.html +++ b/test/fixtures/rich_media/oembed.html @@ -1,3 +1,3 @@ diff --git a/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs index 7bbe46cbe9..f95f15ec35 100644 --- a/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs @@ -336,13 +336,7 @@ test "fake statuses' preview card is not cached", %{conn: conn} do path -> Pleroma.Test.StaticConfig.get(path) end) - Tesla.Mock.mock(fn - %{ - method: :get, - url: "https://example.com/twitter-card" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")} - + Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) diff --git a/test/pleroma/web/pleroma_api/views/chat_message_reference_view_test.exs b/test/pleroma/web/pleroma_api/views/chat_message_reference_view_test.exs index 44d40269cd..c8b3cb3910 100644 --- a/test/pleroma/web/pleroma_api/views/chat_message_reference_view_test.exs +++ b/test/pleroma/web/pleroma_api/views/chat_message_reference_view_test.exs @@ -49,6 +49,7 @@ test "it displays a chat message" do :chat_message_id_idempotency_key_cache, ^id -> {:ok, "123"} cache, key -> NullCache.get(cache, key) end) + |> stub(:fetch, fn :rich_media_cache, _, _ -> {:ok, {:ok, %{}}} end) chat_message = MessageReferenceView.render("show.json", chat_message_reference: cm_ref) diff --git a/test/pleroma/web/rich_media/helpers_test.exs b/test/pleroma/web/rich_media/helpers_test.exs index bf73724767..13d2341ad9 100644 --- a/test/pleroma/web/rich_media/helpers_test.exs +++ b/test/pleroma/web/rich_media/helpers_test.exs @@ -3,7 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.HelpersTest do - use Pleroma.DataCase, async: true + use Pleroma.DataCase, async: false alias Pleroma.StaticStubbedConfigMock, as: ConfigMock alias Pleroma.Web.CommonAPI @@ -14,7 +14,7 @@ defmodule Pleroma.Web.RichMedia.HelpersTest do import Tesla.Mock setup do - mock(fn env -> apply(HttpRequestMock, :request, [env]) end) + mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) ConfigMock |> stub(:get, fn diff --git a/test/pleroma/web/rich_media/parser_test.exs b/test/pleroma/web/rich_media/parser_test.exs index 9064138a64..a05b89a2bf 100644 --- a/test/pleroma/web/rich_media/parser_test.exs +++ b/test/pleroma/web/rich_media/parser_test.exs @@ -3,95 +3,26 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.ParserTest do - use ExUnit.Case, async: true + use Pleroma.DataCase, async: false alias Pleroma.Web.RichMedia.Parser + import Tesla.Mock + setup do - Tesla.Mock.mock(fn - %{ - method: :get, - url: "http://example.com/ogp" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")} - - %{ - method: :get, - url: "http://example.com/non-ogp" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")} - - %{ - method: :get, - url: "http://example.com/ogp-missing-title" - } -> - %Tesla.Env{ - status: 200, - body: File.read!("test/fixtures/rich_media/ogp-missing-title.html") - } - - %{ - method: :get, - url: "http://example.com/twitter-card" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")} - - %{ - method: :get, - url: "http://example.com/oembed" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")} - - %{ - method: :get, - url: "http://example.com/oembed.json" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")} - - %{method: :get, url: "http://example.com/empty"} -> - %Tesla.Env{status: 200, body: "hello"} - - %{method: :get, url: "http://example.com/malformed"} -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")} - - %{method: :get, url: "http://example.com/error"} -> - {:error, :overload} - - %{ - method: :head, - url: "http://example.com/huge-page" - } -> - %Tesla.Env{ - status: 200, - headers: [{"content-length", "2000001"}, {"content-type", "text/html"}] - } - - %{ - method: :head, - url: "http://example.com/pdf-file" - } -> - %Tesla.Env{ - status: 200, - headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}] - } - - %{method: :head} -> - %Tesla.Env{status: 404, body: "", headers: []} - end) - - :ok + mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) end test "returns error when no metadata present" do - assert {:error, _} = Parser.parse("http://example.com/empty") + assert {:error, _} = Parser.parse("https://example.com/empty") end test "doesn't just add a title" do - assert {:error, {:invalid_metadata, _}} = Parser.parse("http://example.com/non-ogp") + assert {:error, {:invalid_metadata, _}} = Parser.parse("https://example.com/non-ogp") end test "parses ogp" do - assert Parser.parse("http://example.com/ogp") == + assert Parser.parse("https://example.com/ogp") == {:ok, %{ "image" => "http://ia.media-imdb.com/images/rock.jpg", @@ -99,12 +30,12 @@ test "parses ogp" do "description" => "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", "type" => "video.movie", - "url" => "http://example.com/ogp" + "url" => "https://example.com/ogp" }} end test "falls back to when ogp:title is missing" do - assert Parser.parse("http://example.com/ogp-missing-title") == + assert Parser.parse("https://example.com/ogp-missing-title") == {:ok, %{ "image" => "http://ia.media-imdb.com/images/rock.jpg", @@ -112,12 +43,12 @@ test "falls back to <title> when ogp:title is missing" do "description" => "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", "type" => "video.movie", - "url" => "http://example.com/ogp-missing-title" + "url" => "https://example.com/ogp-missing-title" }} end test "parses twitter card" do - assert Parser.parse("http://example.com/twitter-card") == + assert Parser.parse("https://example.com/twitter-card") == {:ok, %{ "card" => "summary", @@ -125,12 +56,12 @@ test "parses twitter card" do "image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg", "title" => "Small Island Developing States Photo Submission", "description" => "View the album on Flickr.", - "url" => "http://example.com/twitter-card" + "url" => "https://example.com/twitter-card" }} end test "parses OEmbed and filters HTML tags" do - assert Parser.parse("http://example.com/oembed") == + assert Parser.parse("https://example.com/oembed") == {:ok, %{ "author_name" => "\u202E\u202D\u202Cbees\u202C", @@ -150,7 +81,7 @@ test "parses OEmbed and filters HTML tags" do "thumbnail_width" => 150, "title" => "Bacon Lollys", "type" => "photo", - "url" => "http://example.com/oembed", + "url" => "https://example.com/oembed", "version" => "1.0", "web_page" => "https://www.flickr.com/photos/bees/2362225867/", "web_page_short_url" => "https://flic.kr/p/4AK2sc", @@ -159,18 +90,18 @@ test "parses OEmbed and filters HTML tags" do end test "rejects invalid OGP data" do - assert {:error, _} = Parser.parse("http://example.com/malformed") + assert {:error, _} = Parser.parse("https://example.com/malformed") end test "returns error if getting page was not successful" do - assert {:error, :overload} = Parser.parse("http://example.com/error") + assert {:error, :overload} = Parser.parse("https://example.com/error") end test "does a HEAD request to check if the body is too large" do - assert {:error, :body_too_large} = Parser.parse("http://example.com/huge-page") + assert {:error, :body_too_large} = Parser.parse("https://example.com/huge-page") end test "does a HEAD request to check if the body is html" do - assert {:error, {:content_type, _}} = Parser.parse("http://example.com/pdf-file") + assert {:error, {:content_type, _}} = Parser.parse("https://example.com/pdf-file") end end diff --git a/test/support/cachex_proxy.ex b/test/support/cachex_proxy.ex index 83ae5610f4..8f27986a9c 100644 --- a/test/support/cachex_proxy.ex +++ b/test/support/cachex_proxy.ex @@ -26,9 +26,15 @@ defmodule Pleroma.CachexProxy do @impl true defdelegate fetch!(cache, key, func), to: Cachex + @impl true + defdelegate fetch(cache, key, func), to: Cachex + @impl true defdelegate expire_at(cache, str, num), to: Cachex + @impl true + defdelegate expire(cache, str, num), to: Cachex + @impl true defdelegate exists?(cache, key), to: Cachex diff --git a/test/support/http_request_mock.ex b/test/support/http_request_mock.ex index df3371a75f..f4b6f1f9f6 100644 --- a/test/support/http_request_mock.ex +++ b/test/support/http_request_mock.ex @@ -1059,7 +1059,7 @@ def get("https://example.com/ogp-missing-data", _, _, _) do }} end - def get("http://example.com/malformed", _, _, _) do + def get("https://example.com/malformed", _, _, _) do {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}} end @@ -1472,6 +1472,37 @@ def get("https://yahoo.com/", _, _, _) do {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/yahoo.html")}} end + def get("https://example.com/error", _, _, _), do: {:error, :overload} + + def get("https://example.com/ogp-missing-title", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + body: File.read!("test/fixtures/rich_media/ogp-missing-title.html") + }} + end + + def get("https://example.com/oembed", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")}} + end + + def get("https://example.com/oembed.json", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")}} + end + + def get("https://example.com/twitter-card", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}} + end + + def get("https://example.com/non-ogp", _, _, _) do + {:ok, + %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")}} + end + + def get("https://example.com/empty", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: "hello"}} + end + def get(url, query, body, headers) do {:error, "Mock response not implemented for GET #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"} @@ -1545,17 +1576,41 @@ def post(url, query, body, headers) do # Most of the rich media mocks are missing HEAD requests, so we just return 404. @rich_media_mocks [ + "https://example.com/empty", + "https://example.com/error", + "https://example.com/malformed", + "https://example.com/non-ogp", + "https://example.com/oembed", + "https://example.com/oembed.json", "https://example.com/ogp", "https://example.com/ogp-missing-data", + "https://example.com/ogp-missing-title", "https://example.com/twitter-card", "https://google.com/", - "https://yahoo.com/", - "https://pleroma.local/notice/9kCP7V" + "https://pleroma.local/notice/9kCP7V", + "https://yahoo.com/" ] + def head(url, _query, _body, _headers) when url in @rich_media_mocks do {:ok, %Tesla.Env{status: 404, body: ""}} end + def head("https://example.com/pdf-file", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}] + }} + end + + def head("https://example.com/huge-page", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + headers: [{"content-length", "2000001"}, {"content-type", "text/html"}] + }} + end + def head(url, query, body, headers) do {:error, "Mock response not implemented for HEAD #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"} diff --git a/test/support/null_cache.ex b/test/support/null_cache.ex index 9f1d45f1df..47c84174e8 100644 --- a/test/support/null_cache.ex +++ b/test/support/null_cache.ex @@ -28,6 +28,9 @@ def fetch!(_, key, func) do end end + @impl true + def fetch(_, key, func), do: func.(key) + @impl true def get_and_update(_, _, func) do func.(nil) @@ -36,6 +39,9 @@ def get_and_update(_, _, func) do @impl true def expire_at(_, _, _), do: {:ok, true} + @impl true + def expire(_, _, _), do: {:ok, true} + @impl true def exists?(_, _), do: {:ok, false}