Merge branch 'rich-media-tests' into 'develop'
Pleroma.Web.RichMedia.Parser: Remove test-specific codepaths See merge request pleroma/pleroma!4053
This commit is contained in:
commit
72480e7b2f
12 changed files with 188 additions and 196 deletions
0
changelog.d/rich_media_tests.skip
Normal file
0
changelog.d/rich_media_tests.skip
Normal file
|
@ -8,10 +8,13 @@ defmodule Pleroma.Caching do
|
|||
@callback put(Cachex.cache(), any(), any(), Keyword.t()) :: {Cachex.status(), boolean()}
|
||||
@callback put(Cachex.cache(), any(), any()) :: {Cachex.status(), boolean()}
|
||||
@callback fetch!(Cachex.cache(), any(), function() | nil) :: any()
|
||||
@callback fetch(Cachex.cache(), any(), function() | nil) ::
|
||||
{atom(), any()} | {atom(), any(), any()}
|
||||
# @callback del(Cachex.cache(), any(), Keyword.t()) :: {Cachex.status(), boolean()}
|
||||
@callback del(Cachex.cache(), any()) :: {Cachex.status(), boolean()}
|
||||
@callback stream!(Cachex.cache(), any()) :: Enumerable.t()
|
||||
@callback expire_at(Cachex.cache(), binary(), number()) :: {Cachex.status(), boolean()}
|
||||
@callback expire(Cachex.cache(), binary(), number()) :: {Cachex.status(), boolean()}
|
||||
@callback exists?(Cachex.cache(), any()) :: {Cachex.status(), boolean()}
|
||||
@callback execute!(Cachex.cache(), function()) :: any()
|
||||
@callback get_and_update(Cachex.cache(), any(), function()) ::
|
||||
|
|
|
@ -18,53 +18,10 @@ defmodule Pleroma.Web.RichMedia.Helpers do
|
|||
recv_timeout: 2_000
|
||||
]
|
||||
|
||||
@spec validate_page_url(URI.t() | binary()) :: :ok | :error
|
||||
defp validate_page_url(page_url) when is_binary(page_url) do
|
||||
validate_tld = @config_impl.get([Pleroma.Formatter, :validate_tld])
|
||||
|
||||
page_url
|
||||
|> Linkify.Parser.url?(validate_tld: validate_tld)
|
||||
|> parse_uri(page_url)
|
||||
end
|
||||
|
||||
defp validate_page_url(%URI{host: host, scheme: "https"}) do
|
||||
cond do
|
||||
Linkify.Parser.ip?(host) ->
|
||||
:error
|
||||
|
||||
host in @config_impl.get([:rich_media, :ignore_hosts], []) ->
|
||||
:error
|
||||
|
||||
get_tld(host) in @config_impl.get([:rich_media, :ignore_tld], []) ->
|
||||
:error
|
||||
|
||||
true ->
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
defp validate_page_url(_), do: :error
|
||||
|
||||
defp parse_uri(true, url) do
|
||||
url
|
||||
|> URI.parse()
|
||||
|> validate_page_url
|
||||
end
|
||||
|
||||
defp parse_uri(_, _), do: :error
|
||||
|
||||
defp get_tld(host) do
|
||||
host
|
||||
|> String.split(".")
|
||||
|> Enum.reverse()
|
||||
|> hd
|
||||
end
|
||||
|
||||
def fetch_data_for_object(object) do
|
||||
with true <- @config_impl.get([:rich_media, :enabled]),
|
||||
{:ok, page_url} <-
|
||||
HTML.extract_first_external_url_from_object(object),
|
||||
:ok <- validate_page_url(page_url),
|
||||
{:ok, rich_media} <- Parser.parse(page_url) do
|
||||
%{page_url: page_url, rich_media: rich_media}
|
||||
else
|
||||
|
|
|
@ -6,6 +6,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
|
|||
require Logger
|
||||
|
||||
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
|
||||
@config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config)
|
||||
|
||||
defp parsers do
|
||||
Pleroma.Config.get([:rich_media, :parsers])
|
||||
|
@ -13,13 +14,10 @@ defp parsers do
|
|||
|
||||
def parse(nil), do: {:error, "No URL provided"}
|
||||
|
||||
if Pleroma.Config.get(:env) == :test do
|
||||
@spec parse(String.t()) :: {:ok, map()} | {:error, any()}
|
||||
def parse(url), do: parse_url(url)
|
||||
else
|
||||
@spec parse(String.t()) :: {:ok, map()} | {:error, any()}
|
||||
def parse(url) do
|
||||
with {:ok, data} <- get_cached_or_parse(url),
|
||||
with :ok <- validate_page_url(url),
|
||||
{:ok, data} <- get_cached_or_parse(url),
|
||||
{:ok, _} <- set_ttl_based_on_image(data, url) do
|
||||
{:ok, data}
|
||||
end
|
||||
|
@ -77,7 +75,6 @@ defp log_error(url, {:invalid_metadata, data}) do
|
|||
defp log_error(url, reason) do
|
||||
Logger.warning(fn -> "Rich media error for #{url}: #{inspect(reason)}" end)
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Set the rich media cache based on the expiration time of image.
|
||||
|
@ -166,4 +163,46 @@ defp clean_parsed_data(data) do
|
|||
end)
|
||||
|> Map.new()
|
||||
end
|
||||
|
||||
@spec validate_page_url(URI.t() | binary()) :: :ok | :error
|
||||
defp validate_page_url(page_url) when is_binary(page_url) do
|
||||
validate_tld = @config_impl.get([Pleroma.Formatter, :validate_tld])
|
||||
|
||||
page_url
|
||||
|> Linkify.Parser.url?(validate_tld: validate_tld)
|
||||
|> parse_uri(page_url)
|
||||
end
|
||||
|
||||
defp validate_page_url(%URI{host: host, scheme: "https"}) do
|
||||
cond do
|
||||
Linkify.Parser.ip?(host) ->
|
||||
:error
|
||||
|
||||
host in @config_impl.get([:rich_media, :ignore_hosts], []) ->
|
||||
:error
|
||||
|
||||
get_tld(host) in @config_impl.get([:rich_media, :ignore_tld], []) ->
|
||||
:error
|
||||
|
||||
true ->
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
defp validate_page_url(_), do: :error
|
||||
|
||||
defp parse_uri(true, url) do
|
||||
url
|
||||
|> URI.parse()
|
||||
|> validate_page_url
|
||||
end
|
||||
|
||||
defp parse_uri(_, _), do: :error
|
||||
|
||||
defp get_tld(host) do
|
||||
host
|
||||
|> String.split(".")
|
||||
|> Enum.reverse()
|
||||
|> hd
|
||||
end
|
||||
end
|
||||
|
|
2
test/fixtures/rich_media/oembed.html
vendored
2
test/fixtures/rich_media/oembed.html
vendored
|
@ -1,3 +1,3 @@
|
|||
<link rel="alternate" type="application/json+oembed"
|
||||
href="http://example.com/oembed.json"
|
||||
href="https://example.com/oembed.json"
|
||||
title="Bacon Lollys oEmbed Profile" />
|
||||
|
|
|
@ -336,13 +336,7 @@ test "fake statuses' preview card is not cached", %{conn: conn} do
|
|||
path -> Pleroma.Test.StaticConfig.get(path)
|
||||
end)
|
||||
|
||||
Tesla.Mock.mock(fn
|
||||
%{
|
||||
method: :get,
|
||||
url: "https://example.com/twitter-card"
|
||||
} ->
|
||||
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}
|
||||
|
||||
Tesla.Mock.mock_global(fn
|
||||
env ->
|
||||
apply(HttpRequestMock, :request, [env])
|
||||
end)
|
||||
|
|
|
@ -49,6 +49,7 @@ test "it displays a chat message" do
|
|||
:chat_message_id_idempotency_key_cache, ^id -> {:ok, "123"}
|
||||
cache, key -> NullCache.get(cache, key)
|
||||
end)
|
||||
|> stub(:fetch, fn :rich_media_cache, _, _ -> {:ok, {:ok, %{}}} end)
|
||||
|
||||
chat_message = MessageReferenceView.render("show.json", chat_message_reference: cm_ref)
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
defmodule Pleroma.Web.RichMedia.HelpersTest do
|
||||
use Pleroma.DataCase, async: true
|
||||
use Pleroma.DataCase, async: false
|
||||
|
||||
alias Pleroma.StaticStubbedConfigMock, as: ConfigMock
|
||||
alias Pleroma.Web.CommonAPI
|
||||
|
@ -14,7 +14,7 @@ defmodule Pleroma.Web.RichMedia.HelpersTest do
|
|||
import Tesla.Mock
|
||||
|
||||
setup do
|
||||
mock(fn env -> apply(HttpRequestMock, :request, [env]) end)
|
||||
mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
|
||||
|
||||
ConfigMock
|
||||
|> stub(:get, fn
|
||||
|
|
|
@ -3,95 +3,26 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
defmodule Pleroma.Web.RichMedia.ParserTest do
|
||||
use ExUnit.Case, async: true
|
||||
use Pleroma.DataCase, async: false
|
||||
|
||||
alias Pleroma.Web.RichMedia.Parser
|
||||
|
||||
import Tesla.Mock
|
||||
|
||||
setup do
|
||||
Tesla.Mock.mock(fn
|
||||
%{
|
||||
method: :get,
|
||||
url: "http://example.com/ogp"
|
||||
} ->
|
||||
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}
|
||||
|
||||
%{
|
||||
method: :get,
|
||||
url: "http://example.com/non-ogp"
|
||||
} ->
|
||||
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")}
|
||||
|
||||
%{
|
||||
method: :get,
|
||||
url: "http://example.com/ogp-missing-title"
|
||||
} ->
|
||||
%Tesla.Env{
|
||||
status: 200,
|
||||
body: File.read!("test/fixtures/rich_media/ogp-missing-title.html")
|
||||
}
|
||||
|
||||
%{
|
||||
method: :get,
|
||||
url: "http://example.com/twitter-card"
|
||||
} ->
|
||||
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}
|
||||
|
||||
%{
|
||||
method: :get,
|
||||
url: "http://example.com/oembed"
|
||||
} ->
|
||||
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")}
|
||||
|
||||
%{
|
||||
method: :get,
|
||||
url: "http://example.com/oembed.json"
|
||||
} ->
|
||||
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")}
|
||||
|
||||
%{method: :get, url: "http://example.com/empty"} ->
|
||||
%Tesla.Env{status: 200, body: "hello"}
|
||||
|
||||
%{method: :get, url: "http://example.com/malformed"} ->
|
||||
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}
|
||||
|
||||
%{method: :get, url: "http://example.com/error"} ->
|
||||
{:error, :overload}
|
||||
|
||||
%{
|
||||
method: :head,
|
||||
url: "http://example.com/huge-page"
|
||||
} ->
|
||||
%Tesla.Env{
|
||||
status: 200,
|
||||
headers: [{"content-length", "2000001"}, {"content-type", "text/html"}]
|
||||
}
|
||||
|
||||
%{
|
||||
method: :head,
|
||||
url: "http://example.com/pdf-file"
|
||||
} ->
|
||||
%Tesla.Env{
|
||||
status: 200,
|
||||
headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}]
|
||||
}
|
||||
|
||||
%{method: :head} ->
|
||||
%Tesla.Env{status: 404, body: "", headers: []}
|
||||
end)
|
||||
|
||||
:ok
|
||||
mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
|
||||
end
|
||||
|
||||
test "returns error when no metadata present" do
|
||||
assert {:error, _} = Parser.parse("http://example.com/empty")
|
||||
assert {:error, _} = Parser.parse("https://example.com/empty")
|
||||
end
|
||||
|
||||
test "doesn't just add a title" do
|
||||
assert {:error, {:invalid_metadata, _}} = Parser.parse("http://example.com/non-ogp")
|
||||
assert {:error, {:invalid_metadata, _}} = Parser.parse("https://example.com/non-ogp")
|
||||
end
|
||||
|
||||
test "parses ogp" do
|
||||
assert Parser.parse("http://example.com/ogp") ==
|
||||
assert Parser.parse("https://example.com/ogp") ==
|
||||
{:ok,
|
||||
%{
|
||||
"image" => "http://ia.media-imdb.com/images/rock.jpg",
|
||||
|
@ -99,12 +30,12 @@ test "parses ogp" do
|
|||
"description" =>
|
||||
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
|
||||
"type" => "video.movie",
|
||||
"url" => "http://example.com/ogp"
|
||||
"url" => "https://example.com/ogp"
|
||||
}}
|
||||
end
|
||||
|
||||
test "falls back to <title> when ogp:title is missing" do
|
||||
assert Parser.parse("http://example.com/ogp-missing-title") ==
|
||||
assert Parser.parse("https://example.com/ogp-missing-title") ==
|
||||
{:ok,
|
||||
%{
|
||||
"image" => "http://ia.media-imdb.com/images/rock.jpg",
|
||||
|
@ -112,12 +43,12 @@ test "falls back to <title> when ogp:title is missing" do
|
|||
"description" =>
|
||||
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
|
||||
"type" => "video.movie",
|
||||
"url" => "http://example.com/ogp-missing-title"
|
||||
"url" => "https://example.com/ogp-missing-title"
|
||||
}}
|
||||
end
|
||||
|
||||
test "parses twitter card" do
|
||||
assert Parser.parse("http://example.com/twitter-card") ==
|
||||
assert Parser.parse("https://example.com/twitter-card") ==
|
||||
{:ok,
|
||||
%{
|
||||
"card" => "summary",
|
||||
|
@ -125,12 +56,12 @@ test "parses twitter card" do
|
|||
"image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
|
||||
"title" => "Small Island Developing States Photo Submission",
|
||||
"description" => "View the album on Flickr.",
|
||||
"url" => "http://example.com/twitter-card"
|
||||
"url" => "https://example.com/twitter-card"
|
||||
}}
|
||||
end
|
||||
|
||||
test "parses OEmbed and filters HTML tags" do
|
||||
assert Parser.parse("http://example.com/oembed") ==
|
||||
assert Parser.parse("https://example.com/oembed") ==
|
||||
{:ok,
|
||||
%{
|
||||
"author_name" => "\u202E\u202D\u202Cbees\u202C",
|
||||
|
@ -150,7 +81,7 @@ test "parses OEmbed and filters HTML tags" do
|
|||
"thumbnail_width" => 150,
|
||||
"title" => "Bacon Lollys",
|
||||
"type" => "photo",
|
||||
"url" => "http://example.com/oembed",
|
||||
"url" => "https://example.com/oembed",
|
||||
"version" => "1.0",
|
||||
"web_page" => "https://www.flickr.com/photos/bees/2362225867/",
|
||||
"web_page_short_url" => "https://flic.kr/p/4AK2sc",
|
||||
|
@ -159,18 +90,18 @@ test "parses OEmbed and filters HTML tags" do
|
|||
end
|
||||
|
||||
test "rejects invalid OGP data" do
|
||||
assert {:error, _} = Parser.parse("http://example.com/malformed")
|
||||
assert {:error, _} = Parser.parse("https://example.com/malformed")
|
||||
end
|
||||
|
||||
test "returns error if getting page was not successful" do
|
||||
assert {:error, :overload} = Parser.parse("http://example.com/error")
|
||||
assert {:error, :overload} = Parser.parse("https://example.com/error")
|
||||
end
|
||||
|
||||
test "does a HEAD request to check if the body is too large" do
|
||||
assert {:error, :body_too_large} = Parser.parse("http://example.com/huge-page")
|
||||
assert {:error, :body_too_large} = Parser.parse("https://example.com/huge-page")
|
||||
end
|
||||
|
||||
test "does a HEAD request to check if the body is html" do
|
||||
assert {:error, {:content_type, _}} = Parser.parse("http://example.com/pdf-file")
|
||||
assert {:error, {:content_type, _}} = Parser.parse("https://example.com/pdf-file")
|
||||
end
|
||||
end
|
||||
|
|
|
@ -26,9 +26,15 @@ defmodule Pleroma.CachexProxy do
|
|||
@impl true
|
||||
defdelegate fetch!(cache, key, func), to: Cachex
|
||||
|
||||
@impl true
|
||||
defdelegate fetch(cache, key, func), to: Cachex
|
||||
|
||||
@impl true
|
||||
defdelegate expire_at(cache, str, num), to: Cachex
|
||||
|
||||
@impl true
|
||||
defdelegate expire(cache, str, num), to: Cachex
|
||||
|
||||
@impl true
|
||||
defdelegate exists?(cache, key), to: Cachex
|
||||
|
||||
|
|
|
@ -1059,7 +1059,7 @@ def get("https://example.com/ogp-missing-data", _, _, _) do
|
|||
}}
|
||||
end
|
||||
|
||||
def get("http://example.com/malformed", _, _, _) do
|
||||
def get("https://example.com/malformed", _, _, _) do
|
||||
{:ok,
|
||||
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}}
|
||||
end
|
||||
|
@ -1472,6 +1472,37 @@ def get("https://yahoo.com/", _, _, _) do
|
|||
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/yahoo.html")}}
|
||||
end
|
||||
|
||||
def get("https://example.com/error", _, _, _), do: {:error, :overload}
|
||||
|
||||
def get("https://example.com/ogp-missing-title", _, _, _) do
|
||||
{:ok,
|
||||
%Tesla.Env{
|
||||
status: 200,
|
||||
body: File.read!("test/fixtures/rich_media/ogp-missing-title.html")
|
||||
}}
|
||||
end
|
||||
|
||||
def get("https://example.com/oembed", _, _, _) do
|
||||
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")}}
|
||||
end
|
||||
|
||||
def get("https://example.com/oembed.json", _, _, _) do
|
||||
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")}}
|
||||
end
|
||||
|
||||
def get("https://example.com/twitter-card", _, _, _) do
|
||||
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}}
|
||||
end
|
||||
|
||||
def get("https://example.com/non-ogp", _, _, _) do
|
||||
{:ok,
|
||||
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")}}
|
||||
end
|
||||
|
||||
def get("https://example.com/empty", _, _, _) do
|
||||
{:ok, %Tesla.Env{status: 200, body: "hello"}}
|
||||
end
|
||||
|
||||
def get(url, query, body, headers) do
|
||||
{:error,
|
||||
"Mock response not implemented for GET #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"}
|
||||
|
@ -1545,17 +1576,41 @@ def post(url, query, body, headers) do
|
|||
|
||||
# Most of the rich media mocks are missing HEAD requests, so we just return 404.
|
||||
@rich_media_mocks [
|
||||
"https://example.com/empty",
|
||||
"https://example.com/error",
|
||||
"https://example.com/malformed",
|
||||
"https://example.com/non-ogp",
|
||||
"https://example.com/oembed",
|
||||
"https://example.com/oembed.json",
|
||||
"https://example.com/ogp",
|
||||
"https://example.com/ogp-missing-data",
|
||||
"https://example.com/ogp-missing-title",
|
||||
"https://example.com/twitter-card",
|
||||
"https://google.com/",
|
||||
"https://yahoo.com/",
|
||||
"https://pleroma.local/notice/9kCP7V"
|
||||
"https://pleroma.local/notice/9kCP7V",
|
||||
"https://yahoo.com/"
|
||||
]
|
||||
|
||||
def head(url, _query, _body, _headers) when url in @rich_media_mocks do
|
||||
{:ok, %Tesla.Env{status: 404, body: ""}}
|
||||
end
|
||||
|
||||
def head("https://example.com/pdf-file", _, _, _) do
|
||||
{:ok,
|
||||
%Tesla.Env{
|
||||
status: 200,
|
||||
headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}]
|
||||
}}
|
||||
end
|
||||
|
||||
def head("https://example.com/huge-page", _, _, _) do
|
||||
{:ok,
|
||||
%Tesla.Env{
|
||||
status: 200,
|
||||
headers: [{"content-length", "2000001"}, {"content-type", "text/html"}]
|
||||
}}
|
||||
end
|
||||
|
||||
def head(url, query, body, headers) do
|
||||
{:error,
|
||||
"Mock response not implemented for HEAD #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"}
|
||||
|
|
|
@ -28,6 +28,9 @@ def fetch!(_, key, func) do
|
|||
end
|
||||
end
|
||||
|
||||
@impl true
|
||||
def fetch(_, key, func), do: func.(key)
|
||||
|
||||
@impl true
|
||||
def get_and_update(_, _, func) do
|
||||
func.(nil)
|
||||
|
@ -36,6 +39,9 @@ def get_and_update(_, _, func) do
|
|||
@impl true
|
||||
def expire_at(_, _, _), do: {:ok, true}
|
||||
|
||||
@impl true
|
||||
def expire(_, _, _), do: {:ok, true}
|
||||
|
||||
@impl true
|
||||
def exists?(_, _), do: {:ok, false}
|
||||
|
||||
|
|
Loading…
Reference in a new issue