Revert to pleroma' card parser

Signed-off-by: marcin mikołajczak <git@mkljczk.pl>
This commit is contained in:
marcin mikołajczak 2024-05-14 17:18:22 +02:00
parent aceff8c128
commit 8c3f306c62
28 changed files with 336 additions and 762 deletions

View file

@ -448,7 +448,6 @@
Pleroma.Web.RichMedia.Parsers.TwitterCard, Pleroma.Web.RichMedia.Parsers.TwitterCard,
Pleroma.Web.RichMedia.Parsers.OEmbed Pleroma.Web.RichMedia.Parsers.OEmbed
], ],
oembed_providers_enabled: true,
failure_backoff: 60_000, failure_backoff: 60_000,
ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl] ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]

View file

@ -2143,12 +2143,6 @@
type: :boolean, type: :boolean,
description: "Enables RichMedia parsing of URLs" description: "Enables RichMedia parsing of URLs"
}, },
%{
key: :oembed_providers_enabled,
type: :boolean,
description:
"Embed rich media from a list of known providers. This takes precedence over other parsers."
},
%{ %{
key: :ignore_hosts, key: :ignore_hosts,
type: {:list, :string}, type: {:list, :string},

View file

@ -433,7 +433,6 @@ config :pleroma, Pleroma.Web.MediaProxy.Invalidation.Http,
* `ignore_hosts`: list of hosts which will be ignored by the metadata parser. For example `["accounts.google.com", "xss.website"]`, defaults to `[]`. * `ignore_hosts`: list of hosts which will be ignored by the metadata parser. For example `["accounts.google.com", "xss.website"]`, defaults to `[]`.
* `ignore_tld`: list TLDs (top-level domains) which will ignore for parse metadata. default is ["local", "localdomain", "lan"]. * `ignore_tld`: list TLDs (top-level domains) which will ignore for parse metadata. default is ["local", "localdomain", "lan"].
* `parsers`: list of Rich Media parsers. * `parsers`: list of Rich Media parsers.
* `oembed_providers_enabled`: Embed rich media from a list of known providers. This takes precedence over other parsers.
* `failure_backoff`: Amount of milliseconds after request failure, during which the request will not be retried. * `failure_backoff`: Amount of milliseconds after request failure, during which the request will not be retried.
## HTTP server ## HTTP server

View file

@ -30,7 +30,7 @@ defmodule Pleroma.Web.ActivityPub.ObjectValidators.ArticleNotePageValidator do
def cast_and_apply(data) do def cast_and_apply(data) do
data data
|> cast_data() |> cast_data
|> apply_action(:insert) |> apply_action(:insert)
end end

View file

@ -150,8 +150,7 @@ def create(
) )
when not is_nil(scheduled_at) do when not is_nil(scheduled_at) do
params = params =
params Map.put(params, :in_reply_to_status_id, params[:in_reply_to_id])
|> Map.put(:in_reply_to_status_id, params[:in_reply_to_id])
|> Map.put(:generator, conn.assigns.application) |> Map.put(:generator, conn.assigns.application)
attrs = %{ attrs = %{
@ -211,8 +210,7 @@ defp do_create(
%{assigns: %{user: user}, private: %{open_api_spex: %{body_params: params}}} = conn %{assigns: %{user: user}, private: %{open_api_spex: %{body_params: params}}} = conn
) do ) do
params = params =
params Map.put(params, :in_reply_to_status_id, params[:in_reply_to_id])
|> Map.put(:in_reply_to_status_id, params[:in_reply_to_id])
|> Map.put(:generator, conn.assigns.application) |> Map.put(:generator, conn.assigns.application)
with {:ok, activity} <- CommonAPI.post(user, params) do with {:ok, activity} <- CommonAPI.post(user, params) do
@ -489,7 +487,7 @@ def card(
with %Activity{} = activity <- Activity.get_by_id(status_id), with %Activity{} = activity <- Activity.get_by_id(status_id),
true <- Visibility.visible_for_user?(activity, user) do true <- Visibility.visible_for_user?(activity, user) do
data = Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) data = Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
render(conn, "card.json", %{embed: data}) render(conn, "card.json", data)
else else
_ -> render_error(conn, :not_found, "Record not found") _ -> render_error(conn, :not_found, "Record not found")
end end

View file

@ -7,10 +7,8 @@ defmodule Pleroma.Web.MastodonAPI.InstanceView do
alias Pleroma.Config alias Pleroma.Config
alias Pleroma.Domain alias Pleroma.Domain
alias Pleroma.User
alias Pleroma.Web.ActivityPub.MRF alias Pleroma.Web.ActivityPub.MRF
alias Pleroma.Web.AdminAPI.DomainView alias Pleroma.Web.AdminAPI.DomainView
alias Pleroma.Web.MastodonAPI
@mastodon_api_level "2.7.2" @mastodon_api_level "2.7.2"
@ -37,8 +35,8 @@ def render("show.json", _) do
|> to_string, |> to_string,
registrations: Keyword.get(instance, :registrations_open), registrations: Keyword.get(instance, :registrations_open),
approval_required: Keyword.get(instance, :account_approval_required), approval_required: Keyword.get(instance, :account_approval_required),
configuration: configuration(),
contact_account: contact_account(Keyword.get(instance, :contact_username)), contact_account: contact_account(Keyword.get(instance, :contact_username)),
configuration: configuration(),
rules: render(__MODULE__, "rules.json"), rules: render(__MODULE__, "rules.json"),
# Extra (not present in Mastodon): # Extra (not present in Mastodon):
max_toot_chars: Keyword.get(instance, :limit), max_toot_chars: Keyword.get(instance, :limit),
@ -239,10 +237,10 @@ defp contact_account("@" <> username) do
end end
defp contact_account(username) do defp contact_account(username) do
user = User.get_cached_by_nickname(username) user = Pleroma.User.get_cached_by_nickname(username)
if user do if user do
MastodonAPI.AccountView.render("show.json", %{user: user, for: nil}) Pleroma.Web.MastodonAPI.AccountView.render("show.json", %{user: user, for: nil})
else else
nil nil
end end

View file

@ -21,8 +21,6 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
alias Pleroma.Web.MastodonAPI.StatusView alias Pleroma.Web.MastodonAPI.StatusView
alias Pleroma.Web.MediaProxy alias Pleroma.Web.MediaProxy
alias Pleroma.Web.PleromaAPI.EmojiReactionController alias Pleroma.Web.PleromaAPI.EmojiReactionController
alias Pleroma.Web.RichMedia.Parser.Card
alias Pleroma.Web.RichMedia.Parser.Embed
import Pleroma.Web.ActivityPub.Visibility, only: [get_visibility: 1, visible_for_user?: 2] import Pleroma.Web.ActivityPub.Visibility, only: [get_visibility: 1, visible_for_user?: 2]
@ -366,10 +364,7 @@ def render("show.json", %{activity: %{data: %{"object" => _object}} = activity}
summary = object.data["summary"] || "" summary = object.data["summary"] || ""
card = card = render("card.json", Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity))
render("card.json", %{
embed: Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
})
url = url =
if user.local do if user.local do
@ -480,14 +475,6 @@ def render("show.json", _) do
nil nil
end end
def render("card.json", %{embed: %Embed{} = embed}) do
with {:ok, %Card{} = card} <- Card.parse(embed) do
Card.to_map(card)
else
_ -> nil
end
end
def render("history.json", %{activity: %{data: %{"object" => _object}} = activity} = opts) do def render("history.json", %{activity: %{data: %{"object" => _object}} = activity} = opts) do
object = Object.normalize(activity, fetch: false) object = Object.normalize(activity, fetch: false)
@ -617,7 +604,6 @@ def render("card.json", %{rich_media: rich_media, page_url: page_url}) do
} }
end end
def render("card.json", %{embed: %Card{} = card}), do: Card.to_map(card)
def render("card.json", _), do: nil def render("card.json", _), do: nil
def render("attachment.json", %{attachment: attachment}) do def render("attachment.json", %{attachment: attachment}) do

View file

@ -37,7 +37,7 @@ def render(
card: card:
StatusView.render( StatusView.render(
"card.json", "card.json",
%{embed: Pleroma.Web.RichMedia.Helpers.fetch_data_for_object(object)} Pleroma.Web.RichMedia.Helpers.fetch_data_for_object(object)
) )
} }
|> put_idempotency_key() |> put_idempotency_key()

View file

@ -7,7 +7,6 @@ defmodule Pleroma.Web.RichMedia.Helpers do
alias Pleroma.HTML alias Pleroma.HTML
alias Pleroma.Object alias Pleroma.Object
alias Pleroma.Web.RichMedia.Parser alias Pleroma.Web.RichMedia.Parser
alias Pleroma.Web.RichMedia.Parser.Embed
@cachex Pleroma.Config.get([:cachex, :provider], Cachex) @cachex Pleroma.Config.get([:cachex, :provider], Cachex)
@ -36,10 +35,10 @@ def fetch_data_for_object(object) do
with true <- @config_impl.get([:rich_media, :enabled]), with true <- @config_impl.get([:rich_media, :enabled]),
{:ok, page_url} <- {:ok, page_url} <-
HTML.extract_first_external_url_from_object(object), HTML.extract_first_external_url_from_object(object),
{:ok, %Embed{} = embed} <- Parser.parse(page_url) do {:ok, rich_media} <- Parser.parse(page_url) do
embed %{page_url: page_url, rich_media: rich_media}
else else
_ -> nil _ -> %{}
end end
end end
@ -54,17 +53,18 @@ def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) d
@cachex.fetch!(:scrubber_cache, key, fn _ -> @cachex.fetch!(:scrubber_cache, key, fn _ ->
result = fetch_data_for_object(object) result = fetch_data_for_object(object)
with %Embed{} <- result do cond do
Activity.HTML.add_cache_key_for(activity.id, key) match?(%{page_url: _, rich_media: _}, result) ->
{:commit, result} Activity.HTML.add_cache_key_for(activity.id, key)
else {:commit, result}
_ ->
{:ignore, nil} true ->
{:ignore, %{}}
end end
end) end)
end end
else else
_ -> nil _ -> %{}
end end
end end

View file

@ -4,8 +4,6 @@
defmodule Pleroma.Web.RichMedia.Parser do defmodule Pleroma.Web.RichMedia.Parser do
require Logger require Logger
alias Pleroma.Web.RichMedia.Parser.Card
alias Pleroma.Web.RichMedia.Parser.Embed
@cachex Pleroma.Config.get([:cachex, :provider], Cachex) @cachex Pleroma.Config.get([:cachex, :provider], Cachex)
@config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config) @config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config)
@ -130,44 +128,42 @@ defp get_ttl_from_image(data, url) do
end end
def parse_url(url) do def parse_url(url) do
case maybe_fetch_oembed(url) do
{:ok, %Embed{} = embed} -> {:ok, embed}
_ -> fetch_document(url)
end
end
defp maybe_fetch_oembed(url) do
with true <- Pleroma.Config.get([:rich_media, :oembed_providers_enabled]),
{:ok, oembed_url} <- OEmbedProviders.oembed_url(url),
{:ok, %Tesla.Env{body: json}} <-
Pleroma.Web.RichMedia.Helpers.oembed_get(oembed_url),
{:ok, data} <- Jason.decode(json),
embed <- %Embed{url: url, oembed: data},
{:ok, %Card{}} <- Card.validate(embed) do
{:ok, embed}
else
{:error, error} -> {:error, error}
error -> {:error, error}
end
end
defp fetch_document(url) do
with {:ok, %Tesla.Env{body: html}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url), with {:ok, %Tesla.Env{body: html}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url),
{:ok, html} <- Floki.parse_document(html), {:ok, html} <- Floki.parse_document(html) do
%Embed{} = embed <- parse_embed(html, url) do html
{:ok, embed} |> maybe_parse()
else |> Map.put("url", url)
{:error, error} -> {:error, error} |> clean_parsed_data()
error -> {:error, error} |> check_parsed_data()
end end
end end
defp parse_embed(html, url) do defp maybe_parse(html) do
Enum.reduce(parsers(), %Embed{url: url}, fn parser, acc -> Enum.reduce_while(parsers(), %{}, fn parser, acc ->
parser.parse(html, acc) case parser.parse(html, acc) do
data when data != %{} -> {:halt, data}
_ -> {:cont, acc}
end
end) end)
end end
defp check_parsed_data(%{"title" => title} = data)
when is_binary(title) and title != "" do
{:ok, data}
end
defp check_parsed_data(data) do
{:error, {:invalid_metadata, data}}
end
defp clean_parsed_data(data) do
data
|> Enum.reject(fn {key, val} ->
not match?({:ok, _}, Jason.encode(%{key => val}))
end)
|> Map.new()
end
@spec validate_page_url(URI.t() | binary()) :: :ok | :error @spec validate_page_url(URI.t() | binary()) :: :ok | :error
defp validate_page_url(page_url) when is_binary(page_url) do defp validate_page_url(page_url) when is_binary(page_url) do
validate_tld = @config_impl.get([Pleroma.Formatter, :validate_tld]) validate_tld = @config_impl.get([Pleroma.Formatter, :validate_tld])

View file

@ -1,148 +0,0 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser.Card do
alias Pleroma.Web.RichMedia.Parser.Card
alias Pleroma.Web.RichMedia.Parser.Embed
@types ["link", "photo", "video", "rich"]
# https://docs.joinmastodon.org/entities/card/
defstruct url: nil,
title: nil,
description: "",
type: "link",
author_name: "",
author_url: "",
provider_name: "",
provider_url: "",
html: "",
width: 0,
height: 0,
image: nil,
embed_url: "",
blurhash: nil
def parse(%Embed{url: url, oembed: %{"type" => type, "title" => title} = oembed} = embed)
when type in @types and is_binary(url) do
uri = URI.parse(url)
%Card{
url: url,
title: title,
description: get_description(embed),
type: oembed["type"],
author_name: oembed["author_name"],
author_url: oembed["author_url"],
provider_name: oembed["provider_name"] || uri.host,
provider_url: oembed["provider_url"] || "#{uri.scheme}://#{uri.host}",
html: sanitize_html(oembed["html"]),
width: oembed["width"],
height: oembed["height"],
image: get_image(oembed) |> fix_uri(url) |> proxy(),
embed_url: oembed["url"] |> fix_uri(url) |> proxy()
}
|> validate()
end
def parse(%Embed{url: url} = embed) when is_binary(url) do
uri = URI.parse(url)
%Card{
url: url,
title: get_title(embed),
description: get_description(embed),
type: "link",
provider_name: uri.host,
provider_url: "#{uri.scheme}://#{uri.host}",
image: get_image(embed) |> fix_uri(url) |> proxy()
}
|> validate()
end
def parse(card), do: {:error, {:invalid_metadata, card}}
defp get_title(embed) do
case embed do
%{meta: %{"twitter:title" => title}} when is_binary(title) and title != "" -> title
%{meta: %{"og:title" => title}} when is_binary(title) and title != "" -> title
%{title: title} when is_binary(title) and title != "" -> title
_ -> nil
end
end
defp get_description(%{meta: meta}) do
case meta do
%{"twitter:description" => desc} when is_binary(desc) and desc != "" -> desc
%{"og:description" => desc} when is_binary(desc) and desc != "" -> desc
%{"description" => desc} when is_binary(desc) and desc != "" -> desc
_ -> ""
end
end
defp get_image(%{meta: meta}) do
case meta do
%{"twitter:image" => image} when is_binary(image) and image != "" -> image
%{"og:image" => image} when is_binary(image) and image != "" -> image
_ -> ""
end
end
defp get_image(%{"thumbnail_url" => image}) when is_binary(image) and image != "", do: image
defp get_image(%{"type" => "photo", "url" => image}), do: image
defp get_image(_), do: ""
defp sanitize_html(html) do
with {:ok, html} <- FastSanitize.Sanitizer.scrub(html, Pleroma.HTML.Scrubber.OEmbed),
{:ok, [{"iframe", _, _}]} <- Floki.parse_fragment(html) do
html
else
_ -> ""
end
end
def to_map(%Card{} = card) do
card
|> Map.from_struct()
|> stringify_keys()
end
def to_map(%{} = card), do: stringify_keys(card)
defp stringify_keys(%{} = map), do: Map.new(map, fn {k, v} -> {Atom.to_string(k), v} end)
def fix_uri("http://" <> _ = uri, _base_uri), do: uri
def fix_uri("https://" <> _ = uri, _base_uri), do: uri
def fix_uri("/" <> _ = uri, base_uri), do: URI.merge(base_uri, uri) |> URI.to_string()
def fix_uri("", _base_uri), do: nil
def fix_uri(uri, base_uri) when is_binary(uri),
do: URI.merge(base_uri, "/#{uri}") |> URI.to_string()
def fix_uri(_uri, _base_uri), do: nil
defp proxy(url) when is_binary(url), do: Pleroma.Web.MediaProxy.url(url)
defp proxy(_), do: nil
def validate(%Card{type: type, html: html} = card)
when type in ["video", "rich"] and (is_binary(html) == false or html == "") do
card
|> Map.put(:type, "link")
|> validate()
end
def validate(%Card{type: type, title: title} = card)
when type in @types and is_binary(title) and title != "" do
{:ok, card}
end
def validate(%Embed{} = embed) do
case Card.parse(embed) do
{:ok, %Card{} = card} -> validate(card)
card -> {:error, {:invalid_metadata, card}}
end
end
def validate(card), do: {:error, {:invalid_metadata, card}}
end

View file

@ -1,10 +0,0 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser.Embed do
@moduledoc """
Represents embedded content, including scraped markup and OEmbed.
"""
defstruct url: nil, title: nil, meta: nil, oembed: nil
end

View file

@ -1,39 +0,0 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser.MetaTags do
@doc """
Parses a `Floki.html_tree/0` and returns a map of raw `<meta>` tag values.
"""
@spec parse(html_tree :: Floki.html_tree()) :: map()
def parse(html_tree) do
html_tree
|> Floki.find("meta")
|> Enum.reduce(%{}, fn html_node, acc ->
case parse_node(html_node) do
{:ok, {name, content}} -> Map.put(acc, name, content)
_ -> acc
end
end)
|> clean_data()
end
defp parse_node({_tag, attrs, _children}) when is_list(attrs) do
case Map.new(attrs) do
%{"name" => name, "content" => content} -> {:ok, {name, content}}
%{"property" => name, "content" => content} -> {:ok, {name, content}}
_ -> {:error, :invalid_meta_tag}
end
end
defp parse_node(_), do: {:error, :invalid_meta_tag}
defp clean_data(data) do
data
|> Enum.reject(fn {key, val} ->
not match?({:ok, _}, Jason.encode(%{key => val}))
end)
|> Map.new()
end
end

View file

@ -0,0 +1,46 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
def parse(data, html, prefix, key_name, value_name \\ "content") do
html
|> get_elements(key_name, prefix)
|> Enum.reduce(data, fn el, acc ->
attributes = normalize_attributes(el, prefix, key_name, value_name)
Map.merge(acc, attributes)
end)
|> maybe_put_title(html)
end
defp get_elements(html, key_name, prefix) do
html |> Floki.find("meta[#{key_name}^='#{prefix}:']")
end
defp normalize_attributes(html_node, prefix, key_name, value_name) do
{_tag, attributes, _children} = html_node
data =
Map.new(attributes, fn {name, value} ->
{name, String.trim_leading(value, "#{prefix}:")}
end)
%{data[key_name] => data[value_name]}
end
defp maybe_put_title(%{"title" => _} = meta, _), do: meta
defp maybe_put_title(meta, html) when meta != %{} do
case get_page_title(html) do
"" -> meta
title -> Map.put_new(meta, "title", title)
end
end
defp maybe_put_title(meta, _), do: meta
defp get_page_title(html) do
Floki.find(html, "html head title") |> List.first() |> Floki.text()
end
end

View file

@ -3,18 +3,13 @@
# SPDX-License-Identifier: AGPL-3.0-only # SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
def parse(html, data) do def parse(html, _data) do
with elements = [_ | _] <- get_discovery_data(html), with elements = [_ | _] <- get_discovery_data(html),
oembed_url when is_binary(oembed_url) <- get_oembed_url(elements), oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
{:ok, oembed_data = %{"html" => html}} <- get_oembed_data(oembed_url) do {:ok, oembed_data = %{"html" => html}} <- get_oembed_data(oembed_url) do
data %{oembed_data | "html" => Pleroma.HTML.filter_tags(html)}
|> Map.put(
:oembed,
oembed_data
|> Map.put("html", Pleroma.HTML.filter_tags(html))
)
else else
_e -> data _e -> %{}
end end
end end
@ -27,7 +22,7 @@ defp get_oembed_url([{"link", attributes, _children} | _]) do
end end
defp get_oembed_data(url) do defp get_oembed_data(url) do
with {:ok, %Tesla.Env{body: json}} <- Pleroma.Web.RichMedia.Helpers.oembed_get(url) do with {:ok, %Tesla.Env{body: json}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url) do
Jason.decode(json) Jason.decode(json)
end end
end end

View file

@ -4,5 +4,7 @@
defmodule Pleroma.Web.RichMedia.Parsers.OGP do defmodule Pleroma.Web.RichMedia.Parsers.OGP do
@deprecated "OGP parser is deprecated. Use TwitterCard instead." @deprecated "OGP parser is deprecated. Use TwitterCard instead."
def parse(_html, data), do: data def parse(_html, _data) do
%{}
end
end end

View file

@ -3,22 +3,13 @@
# SPDX-License-Identifier: AGPL-3.0-only # SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do
alias Pleroma.Web.RichMedia.Parser.MetaTags alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser
@spec parse(Floki.html_tree(), map()) :: map() @spec parse(list(), map()) :: map()
def parse(html, data) do def parse(html, data) do
data data
|> Map.put(:title, get_page_title(html)) |> MetaTagsParser.parse(html, "og", "property")
|> Map.put(:meta, MetaTags.parse(html)) |> MetaTagsParser.parse(html, "twitter", "name")
end |> MetaTagsParser.parse(html, "twitter", "property")
def get_page_title(html) do
with [node | _] <- Floki.find(html, "html head title"),
title when is_binary(title) and title != "" <- Floki.text(node),
true <- String.valid?(title) do
title
else
_ -> nil
end
end end
end end

View file

@ -183,7 +183,6 @@ defp deps do
ref: "e0f16822d578866e186a0974d65ad58cddc1e2ab"}, ref: "e0f16822d578866e186a0974d65ad58cddc1e2ab"},
{:restarter, path: "./restarter"}, {:restarter, path: "./restarter"},
{:majic, "~> 1.0"}, {:majic, "~> 1.0"},
{:oembed_providers, "~> 0.1.0"},
{:open_api_spex, "~> 3.16"}, {:open_api_spex, "~> 3.16"},
{:ecto_psql_extras, "~> 0.6"}, {:ecto_psql_extras, "~> 0.6"},
{:vix, "~> 0.26.0"}, {:vix, "~> 0.26.0"},

View file

@ -99,7 +99,6 @@
"oauther": {:hex, :oauther, "1.3.0", "82b399607f0ca9d01c640438b34d74ebd9e4acd716508f868e864537ecdb1f76", [:mix], [], "hexpm", "78eb888ea875c72ca27b0864a6f550bc6ee84f2eeca37b093d3d833fbcaec04e"}, "oauther": {:hex, :oauther, "1.3.0", "82b399607f0ca9d01c640438b34d74ebd9e4acd716508f868e864537ecdb1f76", [:mix], [], "hexpm", "78eb888ea875c72ca27b0864a6f550bc6ee84f2eeca37b093d3d833fbcaec04e"},
"oban": {:hex, :oban, "2.13.6", "a0cb1bce3bd393770512231fb5a3695fa19fd3af10d7575bf73f837aee7abf43", [:mix], [{:ecto_sql, "~> 3.6", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:postgrex, "~> 0.16", [hex: :postgrex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "3c1c5eb16f377b3cbbf2ea14be24d20e3d91285af9d1ac86260b7c2af5464887"}, "oban": {:hex, :oban, "2.13.6", "a0cb1bce3bd393770512231fb5a3695fa19fd3af10d7575bf73f837aee7abf43", [:mix], [{:ecto_sql, "~> 3.6", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:postgrex, "~> 0.16", [hex: :postgrex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "3c1c5eb16f377b3cbbf2ea14be24d20e3d91285af9d1ac86260b7c2af5464887"},
"octo_fetch": {:hex, :octo_fetch, "0.4.0", "074b5ecbc08be10b05b27e9db08bc20a3060142769436242702931c418695b19", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~> 1.1", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "cf8be6f40cd519d7000bb4e84adcf661c32e59369ca2827c4e20042eda7a7fc6"}, "octo_fetch": {:hex, :octo_fetch, "0.4.0", "074b5ecbc08be10b05b27e9db08bc20a3060142769436242702931c418695b19", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~> 1.1", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "cf8be6f40cd519d7000bb4e84adcf661c32e59369ca2827c4e20042eda7a7fc6"},
"oembed_providers": {:hex, :oembed_providers, "0.1.0", "9b336ee5f3ca20ee4ed005383c74b154d30d0abeb98e95828855c0e2841ae46b", [:mix], [{:glob, "~> 1.0", [hex: :glob, repo: "hexpm", optional: false]}, {:jason, "~> 1.2", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "ac1dda0f743aa6fdead3eef59decfefc9de91d550bf0805b8fce16ed10d421ba"},
"open_api_spex": {:hex, :open_api_spex, "3.18.2", "8c855e83bfe8bf81603d919d6e892541eafece3720f34d1700b58024dadde247", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}, {:poison, "~> 3.0 or ~> 4.0 or ~> 5.0", [hex: :poison, repo: "hexpm", optional: true]}, {:ymlr, "~> 2.0 or ~> 3.0 or ~> 4.0", [hex: :ymlr, repo: "hexpm", optional: true]}], "hexpm", "aa3e6dcfc0ad6a02596b2172662da21c9dd848dac145ea9e603f54e3d81b8d2b"}, "open_api_spex": {:hex, :open_api_spex, "3.18.2", "8c855e83bfe8bf81603d919d6e892541eafece3720f34d1700b58024dadde247", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}, {:poison, "~> 3.0 or ~> 4.0 or ~> 5.0", [hex: :poison, repo: "hexpm", optional: true]}, {:ymlr, "~> 2.0 or ~> 3.0 or ~> 4.0", [hex: :ymlr, repo: "hexpm", optional: true]}], "hexpm", "aa3e6dcfc0ad6a02596b2172662da21c9dd848dac145ea9e603f54e3d81b8d2b"},
"parallel_stream": {:hex, :parallel_stream, "1.0.6", "b967be2b23f0f6787fab7ed681b4c45a215a81481fb62b01a5b750fa8f30f76c", [:mix], [], "hexpm", "639b2e8749e11b87b9eb42f2ad325d161c170b39b288ac8d04c4f31f8f0823eb"}, "parallel_stream": {:hex, :parallel_stream, "1.0.6", "b967be2b23f0f6787fab7ed681b4c45a215a81481fb62b01a5b750fa8f30f76c", [:mix], [], "hexpm", "639b2e8749e11b87b9eb42f2ad325d161c170b39b288ac8d04c4f31f8f0823eb"},
"parse_trans": {:hex, :parse_trans, "3.4.1", "6e6aa8167cb44cc8f39441d05193be6e6f4e7c2946cb2759f015f8c56b76e5ff", [:rebar3], [], "hexpm", "620a406ce75dada827b82e453c19cf06776be266f5a67cff34e1ef2cbb60e49a"}, "parse_trans": {:hex, :parse_trans, "3.4.1", "6e6aa8167cb44cc8f39441d05193be6e6f4e7c2946cb2759f015f8c56b76e5ff", [:rebar3], [], "hexpm", "620a406ce75dada827b82e453c19cf06776be266f5a67cff34e1ef2cbb60e49a"},

View file

@ -197,7 +197,7 @@ test "it detects language from JSON-LD context" do
"actor" => user.ap_id "actor" => user.ap_id
} }
{:ok, _create_activity, meta} = ObjectValidator.validate(note_activity, []) |> IO.inspect() {:ok, _create_activity, meta} = ObjectValidator.validate(note_activity, [])
assert meta[:object_data]["language"] == "pl" assert meta[:object_data]["language"] == "pl"
end end

View file

@ -1717,6 +1717,7 @@ test "returns rich-media card", %{conn: conn, user: user} do
card_data = %{ card_data = %{
"image" => "http://ia.media-imdb.com/images/rock.jpg", "image" => "http://ia.media-imdb.com/images/rock.jpg",
"image_description" => "",
"provider_name" => "example.com", "provider_name" => "example.com",
"provider_url" => "https://example.com", "provider_url" => "https://example.com",
"title" => "The Rock", "title" => "The Rock",
@ -1724,13 +1725,16 @@ test "returns rich-media card", %{conn: conn, user: user} do
"url" => "https://example.com/ogp", "url" => "https://example.com/ogp",
"description" => "description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
"author_name" => "", "pleroma" => %{
"author_url" => "", "opengraph" => %{
"blurhash" => nil, "image" => "http://ia.media-imdb.com/images/rock.jpg",
"embed_url" => "", "title" => "The Rock",
"height" => 0, "type" => "video.movie",
"html" => "", "url" => "https://example.com/ogp",
"width" => 0 "description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer."
}
}
} }
response = response =
@ -1767,16 +1771,17 @@ test "replaces missing description with an empty string", %{conn: conn, user: us
"title" => "Pleroma", "title" => "Pleroma",
"description" => "", "description" => "",
"image" => nil, "image" => nil,
"image_description" => "",
"provider_name" => "example.com", "provider_name" => "example.com",
"provider_url" => "https://example.com", "provider_url" => "https://example.com",
"url" => "https://example.com/ogp-missing-data", "url" => "https://example.com/ogp-missing-data",
"author_name" => "", "pleroma" => %{
"author_url" => "", "opengraph" => %{
"blurhash" => nil, "title" => "Pleroma",
"embed_url" => "", "type" => "website",
"height" => 0, "url" => "https://example.com/ogp-missing-data"
"html" => "", }
"width" => 0 }
} }
end end
end end

View file

@ -17,7 +17,6 @@ defmodule Pleroma.Web.MastodonAPI.StatusViewTest do
alias Pleroma.Web.CommonAPI alias Pleroma.Web.CommonAPI
alias Pleroma.Web.MastodonAPI.AccountView alias Pleroma.Web.MastodonAPI.AccountView
alias Pleroma.Web.MastodonAPI.StatusView alias Pleroma.Web.MastodonAPI.StatusView
alias Pleroma.Web.RichMedia.Parser.Embed
require Bitwise require Bitwise
@ -448,7 +447,7 @@ test "a quote post" do
assert status.pleroma.quote_url == Object.normalize(quote_post).data["id"] assert status.pleroma.quote_url == Object.normalize(quote_post).data["id"]
assert status.pleroma.quote_visible assert status.pleroma.quote_visible
# Quotes don't go more than one level deep\ # Quotes don't go more than one level deep
refute status.pleroma.quote.pleroma.quote refute status.pleroma.quote.pleroma.quote
assert status.pleroma.quote.pleroma.quote_id == to_string(post.id) assert status.pleroma.quote.pleroma.quote_id == to_string(post.id)
assert status.pleroma.quote.pleroma.quote_url == Object.normalize(post).data["id"] assert status.pleroma.quote.pleroma.quote_url == Object.normalize(post).data["id"]
@ -756,45 +755,57 @@ test "it returns a a dictionary tags" do
describe "rich media cards" do describe "rich media cards" do
test "a rich media card without a site name renders correctly" do test "a rich media card without a site name renders correctly" do
embed = %Embed{ page_url = "http://example.com"
url: "http://example.com",
title: "Example website",
meta: %{"twitter:image" => "http://example.com/example.jpg"}
}
%{"provider_name" => "example.com"} = StatusView.render("card.json", %{embed: embed}) card = %{
end url: page_url,
image: page_url <> "/example.jpg",
test "a rich media card without a site name or image renders correctly" do
embed = %Embed{
url: "http://example.com",
title: "Example website" title: "Example website"
} }
%{"provider_name" => "example.com"} = StatusView.render("card.json", %{embed: embed}) %{provider_name: "example.com"} =
StatusView.render("card.json", %{page_url: page_url, rich_media: card})
end
test "a rich media card without a site name or image renders correctly" do
page_url = "http://example.com"
card = %{
url: page_url,
title: "Example website"
}
%{provider_name: "example.com"} =
StatusView.render("card.json", %{page_url: page_url, rich_media: card})
end end
test "a rich media card without an image renders correctly" do test "a rich media card without an image renders correctly" do
embed = %Embed{ page_url = "http://example.com"
url: "http://example.com",
title: "Example website", card = %{
meta: %{"twitter:title" => "Example site name"} url: page_url,
site_name: "Example site name",
title: "Example website"
} }
%{"provider_name" => "example.com"} = StatusView.render("card.json", %{embed: embed}) %{provider_name: "example.com"} =
StatusView.render("card.json", %{page_url: page_url, rich_media: card})
end end
test "a rich media card with all relevant data renders correctly" do test "a rich media card with all relevant data renders correctly" do
embed = %Embed{ page_url = "http://example.com"
url: "http://example.com",
card = %{
"image:alt" => "Example image description",
url: page_url,
site_name: "Example site name",
title: "Example website", title: "Example website",
meta: %{ image: page_url <> "/example.jpg",
"twitter:title" => "Example site name", description: "Example description"
"twitter:image" => "http://example.com/example.jpg"
}
} }
%{"provider_name" => "example.com"} = StatusView.render("card.json", %{embed: embed}) %{provider_name: "example.com", image_description: "Example image description"} =
StatusView.render("card.json", %{page_url: page_url, rich_media: card})
end end
test "a rich media card has all media proxied" do test "a rich media card has all media proxied" do

View file

@ -3,38 +3,28 @@
# SPDX-License-Identifier: AGPL-3.0-only # SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.PleromaAPI.ChatMessageReferenceViewTest do defmodule Pleroma.Web.PleromaAPI.ChatMessageReferenceViewTest do
use Pleroma.DataCase, async: false alias Pleroma.NullCache
use Pleroma.DataCase, async: true
alias Pleroma.Chat alias Pleroma.Chat
alias Pleroma.Chat.MessageReference alias Pleroma.Chat.MessageReference
alias Pleroma.Object alias Pleroma.Object
alias Pleroma.StaticStubbedConfigMock, as: ConfigMock alias Pleroma.StaticStubbedConfigMock
alias Pleroma.UnstubbedConfigMock, as: ConfigMock
alias Pleroma.Web.ActivityPub.ActivityPub alias Pleroma.Web.ActivityPub.ActivityPub
alias Pleroma.Web.CommonAPI alias Pleroma.Web.CommonAPI
alias Pleroma.Web.PleromaAPI.Chat.MessageReferenceView alias Pleroma.Web.PleromaAPI.Chat.MessageReferenceView
import Mox import Mox
import Pleroma.Factory import Pleroma.Factory
import Tesla.Mock
test "crawls valid, complete URLs" do
mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
ConfigMock
|> stub(:get, fn
[:rich_media, :enabled] -> true
path -> Pleroma.Test.StaticConfig.get(path)
end)
Pleroma.UnstubbedConfigMock
|> stub(:get, fn
[:rich_media, :enabled] -> true
path -> Pleroma.Test.StaticConfig.get(path)
end)
test "it displays a chat message" do
user = insert(:user) user = insert(:user)
recipient = insert(:user) recipient = insert(:user)
ConfigMock
|> stub_with(Pleroma.Test.StaticConfig)
file = %Plug.Upload{ file = %Plug.Upload{
content_type: "image/jpeg", content_type: "image/jpeg",
path: Path.absname("test/fixtures/image.jpg"), path: Path.absname("test/fixtures/image.jpg"),
@ -52,14 +42,14 @@ test "crawls valid, complete URLs" do
cm_ref = MessageReference.for_chat_and_object(chat, object) cm_ref = MessageReference.for_chat_and_object(chat, object)
{:ok, activity} = id = cm_ref.id
CommonAPI.post(user, %{
status: "[test](https://example.com/ogp)",
content_type: "text/markdown"
})
assert %{url: "https://example.com/ogp", meta: %{} = _} = Pleroma.CachexMock
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) |> stub(:get, fn
:chat_message_id_idempotency_key_cache, ^id -> {:ok, "123"}
cache, key -> NullCache.get(cache, key)
end)
|> stub(:fetch, fn :rich_media_cache, _, _ -> {:ok, {:ok, %{}}} end)
chat_message = MessageReferenceView.render("show.json", chat_message_reference: cm_ref) chat_message = MessageReferenceView.render("show.json", chat_message_reference: cm_ref)
@ -70,6 +60,18 @@ test "crawls valid, complete URLs" do
assert chat_message[:created_at] assert chat_message[:created_at]
assert chat_message[:unread] == false assert chat_message[:unread] == false
assert match?([%{shortcode: "firefox"}], chat_message[:emojis]) assert match?([%{shortcode: "firefox"}], chat_message[:emojis])
assert chat_message[:idempotency_key] == "123"
StaticStubbedConfigMock
|> stub(:get, fn
[:rich_media, :enabled] -> true
path -> Pleroma.Test.StaticConfig.get(path)
end)
Tesla.Mock.mock_global(fn
%{url: "https://example.com/ogp"} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}
end)
{:ok, activity} = {:ok, activity} =
CommonAPI.post_chat_message(recipient, user, "gkgkgk https://example.com/ogp", CommonAPI.post_chat_message(recipient, user, "gkgkgk https://example.com/ogp",

View file

@ -43,7 +43,7 @@ test "refuses to crawl incomplete URLs" do
path -> Pleroma.Test.StaticConfig.get(path) path -> Pleroma.Test.StaticConfig.get(path)
end) end)
assert Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) == nil assert %{} == Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
end end
test "refuses to crawl malformed URLs" do test "refuses to crawl malformed URLs" do
@ -61,7 +61,7 @@ test "refuses to crawl malformed URLs" do
path -> Pleroma.Test.StaticConfig.get(path) path -> Pleroma.Test.StaticConfig.get(path)
end) end)
assert Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) == nil assert %{} == Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
end end
test "crawls valid, complete URLs" do test "crawls valid, complete URLs" do
@ -79,7 +79,7 @@ test "crawls valid, complete URLs" do
path -> Pleroma.Test.StaticConfig.get(path) path -> Pleroma.Test.StaticConfig.get(path)
end) end)
assert %{url: "https://example.com/ogp", meta: %{} = _} = assert %{page_url: "https://example.com/ogp", rich_media: _} =
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
end end
@ -97,7 +97,7 @@ test "recrawls URLs on updates" do
{:ok, activity} = CommonAPI.post(user, %{status: "I like this site #{original_url}"}) {:ok, activity} = CommonAPI.post(user, %{status: "I like this site #{original_url}"})
assert match?( assert match?(
%{url: ^original_url, meta: _}, %{page_url: ^original_url, rich_media: _},
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
) )
@ -106,7 +106,7 @@ test "recrawls URLs on updates" do
activity = Pleroma.Activity.get_by_id(activity.id) activity = Pleroma.Activity.get_by_id(activity.id)
assert match?( assert match?(
%{url: ^updated_url, meta: _}, %{page_url: ^updated_url, rich_media: _},
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
) )
end end
@ -128,10 +128,10 @@ test "refuses to crawl URLs of private network from posts" do
path -> Pleroma.Test.StaticConfig.get(path) path -> Pleroma.Test.StaticConfig.get(path)
end) end)
assert Helpers.fetch_data_for_activity(activity) == nil assert %{} == Helpers.fetch_data_for_activity(activity)
assert Helpers.fetch_data_for_activity(activity2) == nil assert %{} == Helpers.fetch_data_for_activity(activity2)
assert Helpers.fetch_data_for_activity(activity3) == nil assert %{} == Helpers.fetch_data_for_activity(activity3)
assert Helpers.fetch_data_for_activity(activity4) == nil assert %{} == Helpers.fetch_data_for_activity(activity4)
assert Helpers.fetch_data_for_activity(activity5) == nil assert %{} == Helpers.fetch_data_for_activity(activity5)
end end
end end

View file

@ -1,129 +0,0 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser.CardTest do
use ExUnit.Case, async: true
alias Pleroma.Web.RichMedia.Parser.Card
alias Pleroma.Web.RichMedia.Parser.Embed
alias Pleroma.Web.RichMedia.Parsers.TwitterCard
describe "parse/1" do
test "converts an %Embed{} into a %Card{}" do
url =
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
embed =
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html")
|> Floki.parse_document!()
|> TwitterCard.parse(%Embed{url: url})
expected = %Card{
description:
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
image:
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
title: "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
type: "link",
provider_name: "www.nytimes.com",
provider_url: "https://www.nytimes.com",
url: url
}
assert Card.parse(embed) == {:ok, expected}
end
test "converts URL paths into absolute URLs" do
embed = %Embed{
url: "https://spam.com/luigi",
title: "Watch Luigi not doing anything",
meta: %{
"og:image" => "/uploads/weegee.jpeg"
}
}
{:ok, card} = Card.parse(embed)
assert card.image == "https://spam.com/uploads/weegee.jpeg"
end
test "falls back to Link with invalid Rich/Video" do
url = "https://ishothim.com/our-work/mexican-drug-cartels/"
oembed = File.read!("test/fixtures/rich_media/wordpress_embed.json") |> Jason.decode!()
embed =
File.read!("test/fixtures/rich_media/wordpress.html")
|> Floki.parse_document!()
|> TwitterCard.parse(%Embed{url: url, oembed: oembed})
expected = %Card{
author_name: "Michael Jeter",
author_url: "https://ishothim.com/author/mike/",
blurhash: nil,
description:
"I Shot Him collaborated with the folks at Visual.ly on this informative animation about the violence from drug cartels happening right across our border. We researched, wrote, illustrated, and animated this piece to inform people about the connections of our drug and gun laws to the death of innocence in Mexico.",
embed_url: nil,
height: 338,
html: "",
image: "https://ishothim.com/wp-content/uploads/2013/01/Cartel_feature.jpg",
provider_name: "I Shot Him",
provider_url: "https://ishothim.com",
title: "Mexican Drug Cartels",
type: "link",
url: "https://ishothim.com/our-work/mexican-drug-cartels/",
width: 600
}
assert Card.parse(embed) == {:ok, expected}
end
end
describe "validate/1" do
test "returns {:ok, card} with a valid %Card{}" do
card = %Card{
title: "Moms can't believe this one trick",
url: "http://spam.com",
type: "link"
}
assert {:ok, ^card} = Card.validate(card)
end
end
describe "fix_uri/2" do
setup do: %{base_uri: "https://benis.xyz/hello/fam"}
test "two full URLs", %{base_uri: base_uri} do
uri = "https://benis.xyz/images/pic.jpeg"
assert Card.fix_uri(uri, base_uri) == uri
end
test "URI with leading slash", %{base_uri: base_uri} do
uri = "/images/pic.jpeg"
expected = "https://benis.xyz/images/pic.jpeg"
assert Card.fix_uri(uri, base_uri) == expected
end
test "URI without leading slash", %{base_uri: base_uri} do
uri = "images/pic.jpeg"
expected = "https://benis.xyz/images/pic.jpeg"
assert Card.fix_uri(uri, base_uri) == expected
end
test "empty URI", %{base_uri: base_uri} do
assert Card.fix_uri("", base_uri) == nil
end
test "nil URI", %{base_uri: base_uri} do
assert Card.fix_uri(nil, base_uri) == nil
end
# https://github.com/elixir-lang/elixir/issues/10771
test "Elixir #10771", _ do
uri =
"https://images.macrumors.com/t/4riJyi1XC906qyJ41nAfOgpvo1I=/1600x/https://images.macrumors.com/article-new/2020/09/spatialaudiofeature.jpg"
base_uri = "https://www.macrumors.com/guide/apps-support-apples-spatial-audio-feature/"
assert Card.fix_uri(uri, base_uri) == uri
end
end
end

View file

@ -1,81 +0,0 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser.MetaTagsTest do
use ExUnit.Case, async: true
alias Pleroma.Web.RichMedia.Parser.MetaTags
test "returns a map of <meta> values" do
html =
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html")
|> Floki.parse_document!()
expected = %{
"CG" => "nyregion",
"CN" => "experience-tech-and-society",
"CT" => "spotlight",
"PST" => "News",
"PT" => "article",
"SCG" => "",
"al:android:app_name" => "NYTimes",
"al:android:package" => "com.nytimes.android",
"al:android:url" => "nytimes://reader/id/100000006583622",
"al:ipad:app_name" => "NYTimes",
"al:ipad:app_store_id" => "357066198",
"al:ipad:url" =>
"nytimes://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"al:iphone:app_name" => "NYTimes",
"al:iphone:app_store_id" => "284862083",
"al:iphone:url" =>
"nytimes://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"article:modified" => "2019-08-02T09:30:23.000Z",
"article:published" => "2019-08-01T17:15:31.000Z",
"article:section" => "New York",
"article:tag" => "New York City",
"articleid" => "100000006583622",
"byl" => "By Joseph Goldstein and Ali Watkins",
"description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"fb:app_id" => "9869919170",
"image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
"msapplication-starturl" => "https://www.nytimes.com",
"news_keywords" =>
"NYPD,Juvenile delinquency,Facial Recognition,Privacy,Government Surveillance,Police,Civil Rights,NYC",
"nyt_uri" => "nyt://article/9da58246-2495-505f-9abd-b5fda8e67b56",
"og:description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"og:image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
"og:title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"og:type" => "article",
"og:url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"pdate" => "20190801",
"pubp_event_id" => "pubp://event/47a657bafa8a476bb36832f90ee5ac6e",
"robots" => "noarchive",
"thumbnail" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-thumbStandard.jpg",
"twitter:app:id:googleplay" => "com.nytimes.android",
"twitter:app:name:googleplay" => "NYTimes",
"twitter:app:url:googleplay" => "nytimes://reader/id/100000006583622",
"twitter:card" => "summary_large_image",
"twitter:description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"twitter:image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
"twitter:image:alt" => "",
"twitter:title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"twitter:url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"viewport" => "width=device-width, initial-scale=1, maximum-scale=1"
}
assert MetaTags.parse(html) == expected
end
end

View file

@ -6,7 +6,6 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
use Pleroma.DataCase, async: false use Pleroma.DataCase, async: false
alias Pleroma.Web.RichMedia.Parser alias Pleroma.Web.RichMedia.Parser
alias Pleroma.Web.RichMedia.Parser.Embed
import Tesla.Mock import Tesla.Mock
@ -14,123 +13,84 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
end end
test "returns empty embed when no metadata present" do test "returns error when no metadata present" do
expected = %Embed{ assert {:error, _} = Parser.parse("https://example.com/empty")
meta: %{}, end
oembed: nil,
title: nil,
url: "https://example.com/empty"
}
assert Parser.parse("https://example.com/empty") == {:ok, expected} test "doesn't just add a title" do
assert {:error, {:invalid_metadata, _}} = Parser.parse("https://example.com/non-ogp")
end end
test "parses ogp" do test "parses ogp" do
url = "https://example.com/ogp" assert Parser.parse("https://example.com/ogp") ==
{:ok,
expected = %Embed{ %{
meta: %{ "image" => "http://ia.media-imdb.com/images/rock.jpg",
"og:image" => "http://ia.media-imdb.com/images/rock.jpg", "title" => "The Rock",
"og:title" => "The Rock", "description" =>
"og:description" => "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", "type" => "video.movie",
"og:type" => "video.movie", "url" => "https://example.com/ogp"
"og:url" => "http://www.imdb.com/title/tt0117500/" }}
},
oembed: nil,
title: "The Rock (1996)",
url: "https://example.com/ogp"
}
assert Parser.parse(url) == {:ok, expected}
end end
test "gets <title> tag" do test "falls back to <title> when ogp:title is missing" do
url = "https://example.com/ogp-missing-title" assert Parser.parse("https://example.com/ogp-missing-title") ==
expected = "The Rock (1996)" {:ok,
assert {:ok, %Embed{title: ^expected}} = Parser.parse(url) %{
"image" => "http://ia.media-imdb.com/images/rock.jpg",
"title" => "The Rock (1996)",
"description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
"type" => "video.movie",
"url" => "https://example.com/ogp-missing-title"
}}
end end
test "parses twitter card" do test "parses twitter card" do
url = "https://example.com/twitter-card" assert Parser.parse("https://example.com/twitter-card") ==
{:ok,
expected = %Embed{ %{
meta: %{ "card" => "summary",
"twitter:card" => "summary", "site" => "@flickr",
"twitter:description" => "View the album on Flickr.", "image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
"twitter:image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg", "title" => "Small Island Developing States Photo Submission",
"twitter:site" => "@flickr", "description" => "View the album on Flickr.",
"twitter:title" => "Small Island Developing States Photo Submission" "url" => "https://example.com/twitter-card"
}, }}
oembed: nil,
title: nil,
url: "https://example.com/twitter-card"
}
assert Parser.parse(url) == {:ok, expected}
end end
test "parses OEmbed" do test "parses OEmbed and filters HTML tags" do
url = "https://example.com/oembed" assert Parser.parse("https://example.com/oembed") ==
{:ok,
expected = %Embed{ %{
meta: %{}, "author_name" => "\u202E\u202D\u202Cbees\u202C",
oembed: %{ "author_url" => "https://www.flickr.com/photos/bees/",
"author_name" => "\u202E\u202D\u202Cbees\u202C", "cache_age" => 3600,
"author_url" => "https://www.flickr.com/photos/bees/", "flickr_type" => "photo",
"cache_age" => 3600, "height" => "768",
"flickr_type" => "photo", "html" =>
"height" => "768", "<a href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by \u202E\u202D\u202Cbees\u202C, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"/></a>",
"html" => "license" => "All Rights Reserved",
"<a href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by \u202E\u202D\u202Cbees\u202C, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"/></a>", "license_id" => 0,
"license" => "All Rights Reserved", "provider_name" => "Flickr",
"license_id" => 0, "provider_url" => "https://www.flickr.com/",
"provider_name" => "Flickr", "thumbnail_height" => 150,
"provider_url" => "https://www.flickr.com/", "thumbnail_url" =>
"thumbnail_height" => 150, "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg",
"thumbnail_url" => "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg", "thumbnail_width" => 150,
"thumbnail_width" => 150, "title" => "Bacon Lollys",
"title" => "Bacon Lollys", "type" => "photo",
"type" => "photo", "url" => "https://example.com/oembed",
"url" => "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg", "version" => "1.0",
"version" => "1.0", "web_page" => "https://www.flickr.com/photos/bees/2362225867/",
"web_page" => "https://www.flickr.com/photos/bees/2362225867/", "web_page_short_url" => "https://flic.kr/p/4AK2sc",
"web_page_short_url" => "https://flic.kr/p/4AK2sc", "width" => "1024"
"width" => "1024" }}
},
url: "https://example.com/oembed"
}
assert Parser.parse(url) == {:ok, expected}
end end
test "cleans corrupted meta data" do test "rejects invalid OGP data" do
expected = %Embed{ assert {:error, _} = Parser.parse("https://example.com/malformed")
meta: %{
"Keywords" => "Konsument i zakupy",
"ROBOTS" => "NOARCHIVE",
"fb:app_id" => "515714931781741",
"fb:pages" => "288018984602680",
"google-site-verification" => "3P4BE3hLw82QWqtseIE60qQcOtrpMxMnCNkcv62pjTA",
"news_keywords" => "Konsument i zakupy",
"og:image" =>
"https://bi.im-g.pl/im/f7/49/17/z24418295FBW,Prace-nad-projektem-chusty-antysmogowej-rozpoczely.jpg",
"og:locale" => "pl_PL",
"og:site_name" => "wyborcza.biz",
"og:type" => "article",
"og:url" =>
"http://wyborcza.biz/biznes/7,147743,24417936,pomysl-na-biznes-chusta-ktora-chroni-przed-smogiem.html",
"twitter:card" => "summary_large_image",
"twitter:image" =>
"https://bi.im-g.pl/im/f7/49/17/z24418295FBW,Prace-nad-projektem-chusty-antysmogowej-rozpoczely.jpg",
"viewport" => "width=device-width, user-scalable=yes"
},
oembed: nil,
title: nil,
url: "https://example.com/malformed"
}
assert Parser.parse("https://example.com/malformed") == {:ok, expected}
end end
test "returns error if getting page was not successful" do test "returns error if getting page was not successful" do

View file

@ -6,10 +6,8 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
use ExUnit.Case, async: true use ExUnit.Case, async: true
alias Pleroma.Web.RichMedia.Parsers.TwitterCard alias Pleroma.Web.RichMedia.Parsers.TwitterCard
test "fails gracefully with barebones HTML" do test "returns error when html not contains twitter card" do
html = [{"html", [], [{"head", [], []}, {"body", [], []}]}] assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) == %{}
expected = %{meta: %{}, title: nil}
assert TwitterCard.parse(html, %{}) == expected
end end
test "parses twitter card with only name attributes" do test "parses twitter card with only name attributes" do
@ -17,24 +15,22 @@ test "parses twitter card with only name attributes" do
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers3.html") File.read!("test/fixtures/nypd-facial-recognition-children-teenagers3.html")
|> Floki.parse_document!() |> Floki.parse_document!()
assert %{ assert TwitterCard.parse(html, %{}) ==
title: %{
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times", "app:id:googleplay" => "com.nytimes.android",
meta: %{ "app:name:googleplay" => "NYTimes",
"twitter:app:id:googleplay" => "com.nytimes.android", "app:url:googleplay" => "nytimes://reader/id/100000006583622",
"twitter:app:name:googleplay" => "NYTimes", "site" => nil,
"twitter:app:url:googleplay" => "nytimes://reader/id/100000006583622", "description" =>
"og:description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"og:image" => "image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg", "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
"og:title" => "type" => "article",
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.", "url" =>
"og:type" => "article", "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"og:url" => "title" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html" "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
} }
} = TwitterCard.parse(html, %{})
end end
test "parses twitter card with only property attributes" do test "parses twitter card with only property attributes" do
@ -42,31 +38,20 @@ test "parses twitter card with only property attributes" do
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers2.html") File.read!("test/fixtures/nypd-facial-recognition-children-teenagers2.html")
|> Floki.parse_document!() |> Floki.parse_document!()
assert %{ assert TwitterCard.parse(html, %{}) ==
title: %{
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times", "card" => "summary_large_image",
meta: %{ "description" =>
"twitter:card" => "summary_large_image",
"twitter:description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"twitter:image" => "image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg", "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
"twitter:image:alt" => "", "image:alt" => "",
"twitter:title" => "title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.", "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"twitter:url" => "url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html", "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"og:description" => "type" => "article"
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"og:image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
"og:title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"og:url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"og:type" => "article"
} }
} = TwitterCard.parse(html, %{})
end end
test "parses twitter card with name & property attributes" do test "parses twitter card with name & property attributes" do
@ -74,43 +59,47 @@ test "parses twitter card with name & property attributes" do
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html") File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html")
|> Floki.parse_document!() |> Floki.parse_document!()
assert %{ assert TwitterCard.parse(html, %{}) ==
title: %{
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times", "app:id:googleplay" => "com.nytimes.android",
meta: %{ "app:name:googleplay" => "NYTimes",
"twitter:app:id:googleplay" => "com.nytimes.android", "app:url:googleplay" => "nytimes://reader/id/100000006583622",
"twitter:app:name:googleplay" => "NYTimes", "card" => "summary_large_image",
"twitter:app:url:googleplay" => "nytimes://reader/id/100000006583622", "description" =>
"twitter:card" => "summary_large_image",
"twitter:description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.", "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"twitter:image" => "image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg", "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
"twitter:image:alt" => "", "image:alt" => "",
"twitter:title" => "site" => nil,
"title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.", "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"twitter:url" => "url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html", "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"og:description" => "type" => "article"
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"og:image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
"og:title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"og:url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"og:type" => "article"
} }
} = TwitterCard.parse(html, %{})
end end
test "respect only first title tag on the page" do test "respect only first title tag on the page" do
image_path =
"https://assets.atlasobscura.com/media/W1siZiIsInVwbG9hZHMvYXNzZXRzLzkwYzgyMzI4LThlMDUtNGRiNS05MDg3LTUzMGUxZTM5N2RmMmVkOTM5ZDM4MGM4OTIx" <>
"YTQ5MF9EQVIgZXhodW1hdGlvbiBvZiBNYXJnYXJldCBDb3JiaW4gZ3JhdmUgMTkyNi5qcGciXSxbInAiLCJjb252ZXJ0IiwiIl0sWyJwIiwiY29udmVydCIsIi1xdWFsaXR5IDgxIC1hdXRvLW9" <>
"yaWVudCJdLFsicCIsInRodW1iIiwiNjAweD4iXV0/DAR%20exhumation%20of%20Margaret%20Corbin%20grave%201926.jpg"
html = html =
File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!() File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!()
expected = "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura" assert TwitterCard.parse(html, %{}) ==
%{
assert %{title: ^expected} = TwitterCard.parse(html, %{}) "site" => "@atlasobscura",
"title" => "The Missing Grave of Margaret Corbin, Revolutionary War Veteran",
"card" => "summary_large_image",
"image" => image_path,
"description" =>
"She's the only woman veteran honored with a monument at West Point. But where was she buried?",
"site_name" => "Atlas Obscura",
"type" => "article",
"url" => "http://www.atlasobscura.com/articles/margaret-corbin-grave-west-point"
}
end end
test "takes first title found in html head if there is an html markup error" do test "takes first title found in html head if there is an html markup error" do
@ -118,9 +107,21 @@ test "takes first title found in html head if there is an html markup error" do
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers4.html") File.read!("test/fixtures/nypd-facial-recognition-children-teenagers4.html")
|> Floki.parse_document!() |> Floki.parse_document!()
expected = assert TwitterCard.parse(html, %{}) ==
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times" %{
"site" => nil,
assert %{title: ^expected} = TwitterCard.parse(html, %{}) "title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"app:id:googleplay" => "com.nytimes.android",
"app:name:googleplay" => "NYTimes",
"app:url:googleplay" => "nytimes://reader/id/100000006583622",
"description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
"type" => "article",
"url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
}
end end
end end