Revert to pleroma' card parser

Signed-off-by: marcin mikołajczak <git@mkljczk.pl>
This commit is contained in:
marcin mikołajczak 2024-05-14 17:18:22 +02:00
parent aceff8c128
commit 8c3f306c62
28 changed files with 336 additions and 762 deletions

View file

@ -448,7 +448,6 @@
Pleroma.Web.RichMedia.Parsers.TwitterCard,
Pleroma.Web.RichMedia.Parsers.OEmbed
],
oembed_providers_enabled: true,
failure_backoff: 60_000,
ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]

View file

@ -2143,12 +2143,6 @@
type: :boolean,
description: "Enables RichMedia parsing of URLs"
},
%{
key: :oembed_providers_enabled,
type: :boolean,
description:
"Embed rich media from a list of known providers. This takes precedence over other parsers."
},
%{
key: :ignore_hosts,
type: {:list, :string},

View file

@ -433,7 +433,6 @@ config :pleroma, Pleroma.Web.MediaProxy.Invalidation.Http,
* `ignore_hosts`: list of hosts which will be ignored by the metadata parser. For example `["accounts.google.com", "xss.website"]`, defaults to `[]`.
* `ignore_tld`: list TLDs (top-level domains) which will ignore for parse metadata. default is ["local", "localdomain", "lan"].
* `parsers`: list of Rich Media parsers.
* `oembed_providers_enabled`: Embed rich media from a list of known providers. This takes precedence over other parsers.
* `failure_backoff`: Amount of milliseconds after request failure, during which the request will not be retried.
## HTTP server

View file

@ -30,7 +30,7 @@ defmodule Pleroma.Web.ActivityPub.ObjectValidators.ArticleNotePageValidator do
def cast_and_apply(data) do
data
|> cast_data()
|> cast_data
|> apply_action(:insert)
end

View file

@ -150,8 +150,7 @@ def create(
)
when not is_nil(scheduled_at) do
params =
params
|> Map.put(:in_reply_to_status_id, params[:in_reply_to_id])
Map.put(params, :in_reply_to_status_id, params[:in_reply_to_id])
|> Map.put(:generator, conn.assigns.application)
attrs = %{
@ -211,8 +210,7 @@ defp do_create(
%{assigns: %{user: user}, private: %{open_api_spex: %{body_params: params}}} = conn
) do
params =
params
|> Map.put(:in_reply_to_status_id, params[:in_reply_to_id])
Map.put(params, :in_reply_to_status_id, params[:in_reply_to_id])
|> Map.put(:generator, conn.assigns.application)
with {:ok, activity} <- CommonAPI.post(user, params) do
@ -489,7 +487,7 @@ def card(
with %Activity{} = activity <- Activity.get_by_id(status_id),
true <- Visibility.visible_for_user?(activity, user) do
data = Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
render(conn, "card.json", %{embed: data})
render(conn, "card.json", data)
else
_ -> render_error(conn, :not_found, "Record not found")
end

View file

@ -7,10 +7,8 @@ defmodule Pleroma.Web.MastodonAPI.InstanceView do
alias Pleroma.Config
alias Pleroma.Domain
alias Pleroma.User
alias Pleroma.Web.ActivityPub.MRF
alias Pleroma.Web.AdminAPI.DomainView
alias Pleroma.Web.MastodonAPI
@mastodon_api_level "2.7.2"
@ -37,8 +35,8 @@ def render("show.json", _) do
|> to_string,
registrations: Keyword.get(instance, :registrations_open),
approval_required: Keyword.get(instance, :account_approval_required),
configuration: configuration(),
contact_account: contact_account(Keyword.get(instance, :contact_username)),
configuration: configuration(),
rules: render(__MODULE__, "rules.json"),
# Extra (not present in Mastodon):
max_toot_chars: Keyword.get(instance, :limit),
@ -239,10 +237,10 @@ defp contact_account("@" <> username) do
end
defp contact_account(username) do
user = User.get_cached_by_nickname(username)
user = Pleroma.User.get_cached_by_nickname(username)
if user do
MastodonAPI.AccountView.render("show.json", %{user: user, for: nil})
Pleroma.Web.MastodonAPI.AccountView.render("show.json", %{user: user, for: nil})
else
nil
end

View file

@ -21,8 +21,6 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
alias Pleroma.Web.MastodonAPI.StatusView
alias Pleroma.Web.MediaProxy
alias Pleroma.Web.PleromaAPI.EmojiReactionController
alias Pleroma.Web.RichMedia.Parser.Card
alias Pleroma.Web.RichMedia.Parser.Embed
import Pleroma.Web.ActivityPub.Visibility, only: [get_visibility: 1, visible_for_user?: 2]
@ -366,10 +364,7 @@ def render("show.json", %{activity: %{data: %{"object" => _object}} = activity}
summary = object.data["summary"] || ""
card =
render("card.json", %{
embed: Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
})
card = render("card.json", Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity))
url =
if user.local do
@ -480,14 +475,6 @@ def render("show.json", _) do
nil
end
def render("card.json", %{embed: %Embed{} = embed}) do
with {:ok, %Card{} = card} <- Card.parse(embed) do
Card.to_map(card)
else
_ -> nil
end
end
def render("history.json", %{activity: %{data: %{"object" => _object}} = activity} = opts) do
object = Object.normalize(activity, fetch: false)
@ -617,7 +604,6 @@ def render("card.json", %{rich_media: rich_media, page_url: page_url}) do
}
end
def render("card.json", %{embed: %Card{} = card}), do: Card.to_map(card)
def render("card.json", _), do: nil
def render("attachment.json", %{attachment: attachment}) do

View file

@ -37,7 +37,7 @@ def render(
card:
StatusView.render(
"card.json",
%{embed: Pleroma.Web.RichMedia.Helpers.fetch_data_for_object(object)}
Pleroma.Web.RichMedia.Helpers.fetch_data_for_object(object)
)
}
|> put_idempotency_key()

View file

@ -7,7 +7,6 @@ defmodule Pleroma.Web.RichMedia.Helpers do
alias Pleroma.HTML
alias Pleroma.Object
alias Pleroma.Web.RichMedia.Parser
alias Pleroma.Web.RichMedia.Parser.Embed
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
@ -36,10 +35,10 @@ def fetch_data_for_object(object) do
with true <- @config_impl.get([:rich_media, :enabled]),
{:ok, page_url} <-
HTML.extract_first_external_url_from_object(object),
{:ok, %Embed{} = embed} <- Parser.parse(page_url) do
embed
{:ok, rich_media} <- Parser.parse(page_url) do
%{page_url: page_url, rich_media: rich_media}
else
_ -> nil
_ -> %{}
end
end
@ -54,17 +53,18 @@ def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) d
@cachex.fetch!(:scrubber_cache, key, fn _ ->
result = fetch_data_for_object(object)
with %Embed{} <- result do
Activity.HTML.add_cache_key_for(activity.id, key)
{:commit, result}
else
_ ->
{:ignore, nil}
cond do
match?(%{page_url: _, rich_media: _}, result) ->
Activity.HTML.add_cache_key_for(activity.id, key)
{:commit, result}
true ->
{:ignore, %{}}
end
end)
end
else
_ -> nil
_ -> %{}
end
end

View file

@ -4,8 +4,6 @@
defmodule Pleroma.Web.RichMedia.Parser do
require Logger
alias Pleroma.Web.RichMedia.Parser.Card
alias Pleroma.Web.RichMedia.Parser.Embed
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
@config_impl Application.compile_env(:pleroma, [__MODULE__, :config_impl], Pleroma.Config)
@ -130,44 +128,42 @@ defp get_ttl_from_image(data, url) do
end
def parse_url(url) do
case maybe_fetch_oembed(url) do
{:ok, %Embed{} = embed} -> {:ok, embed}
_ -> fetch_document(url)
end
end
defp maybe_fetch_oembed(url) do
with true <- Pleroma.Config.get([:rich_media, :oembed_providers_enabled]),
{:ok, oembed_url} <- OEmbedProviders.oembed_url(url),
{:ok, %Tesla.Env{body: json}} <-
Pleroma.Web.RichMedia.Helpers.oembed_get(oembed_url),
{:ok, data} <- Jason.decode(json),
embed <- %Embed{url: url, oembed: data},
{:ok, %Card{}} <- Card.validate(embed) do
{:ok, embed}
else
{:error, error} -> {:error, error}
error -> {:error, error}
end
end
defp fetch_document(url) do
with {:ok, %Tesla.Env{body: html}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url),
{:ok, html} <- Floki.parse_document(html),
%Embed{} = embed <- parse_embed(html, url) do
{:ok, embed}
else
{:error, error} -> {:error, error}
error -> {:error, error}
{:ok, html} <- Floki.parse_document(html) do
html
|> maybe_parse()
|> Map.put("url", url)
|> clean_parsed_data()
|> check_parsed_data()
end
end
defp parse_embed(html, url) do
Enum.reduce(parsers(), %Embed{url: url}, fn parser, acc ->
parser.parse(html, acc)
defp maybe_parse(html) do
Enum.reduce_while(parsers(), %{}, fn parser, acc ->
case parser.parse(html, acc) do
data when data != %{} -> {:halt, data}
_ -> {:cont, acc}
end
end)
end
defp check_parsed_data(%{"title" => title} = data)
when is_binary(title) and title != "" do
{:ok, data}
end
defp check_parsed_data(data) do
{:error, {:invalid_metadata, data}}
end
defp clean_parsed_data(data) do
data
|> Enum.reject(fn {key, val} ->
not match?({:ok, _}, Jason.encode(%{key => val}))
end)
|> Map.new()
end
@spec validate_page_url(URI.t() | binary()) :: :ok | :error
defp validate_page_url(page_url) when is_binary(page_url) do
validate_tld = @config_impl.get([Pleroma.Formatter, :validate_tld])

View file

@ -1,148 +0,0 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser.Card do
alias Pleroma.Web.RichMedia.Parser.Card
alias Pleroma.Web.RichMedia.Parser.Embed
@types ["link", "photo", "video", "rich"]
# https://docs.joinmastodon.org/entities/card/
defstruct url: nil,
title: nil,
description: "",
type: "link",
author_name: "",
author_url: "",
provider_name: "",
provider_url: "",
html: "",
width: 0,
height: 0,
image: nil,
embed_url: "",
blurhash: nil
def parse(%Embed{url: url, oembed: %{"type" => type, "title" => title} = oembed} = embed)
when type in @types and is_binary(url) do
uri = URI.parse(url)
%Card{
url: url,
title: title,
description: get_description(embed),
type: oembed["type"],
author_name: oembed["author_name"],
author_url: oembed["author_url"],
provider_name: oembed["provider_name"] || uri.host,
provider_url: oembed["provider_url"] || "#{uri.scheme}://#{uri.host}",
html: sanitize_html(oembed["html"]),
width: oembed["width"],
height: oembed["height"],
image: get_image(oembed) |> fix_uri(url) |> proxy(),
embed_url: oembed["url"] |> fix_uri(url) |> proxy()
}
|> validate()
end
def parse(%Embed{url: url} = embed) when is_binary(url) do
uri = URI.parse(url)
%Card{
url: url,
title: get_title(embed),
description: get_description(embed),
type: "link",
provider_name: uri.host,
provider_url: "#{uri.scheme}://#{uri.host}",
image: get_image(embed) |> fix_uri(url) |> proxy()
}
|> validate()
end
def parse(card), do: {:error, {:invalid_metadata, card}}
defp get_title(embed) do
case embed do
%{meta: %{"twitter:title" => title}} when is_binary(title) and title != "" -> title
%{meta: %{"og:title" => title}} when is_binary(title) and title != "" -> title
%{title: title} when is_binary(title) and title != "" -> title
_ -> nil
end
end
defp get_description(%{meta: meta}) do
case meta do
%{"twitter:description" => desc} when is_binary(desc) and desc != "" -> desc
%{"og:description" => desc} when is_binary(desc) and desc != "" -> desc
%{"description" => desc} when is_binary(desc) and desc != "" -> desc
_ -> ""
end
end
defp get_image(%{meta: meta}) do
case meta do
%{"twitter:image" => image} when is_binary(image) and image != "" -> image
%{"og:image" => image} when is_binary(image) and image != "" -> image
_ -> ""
end
end
defp get_image(%{"thumbnail_url" => image}) when is_binary(image) and image != "", do: image
defp get_image(%{"type" => "photo", "url" => image}), do: image
defp get_image(_), do: ""
defp sanitize_html(html) do
with {:ok, html} <- FastSanitize.Sanitizer.scrub(html, Pleroma.HTML.Scrubber.OEmbed),
{:ok, [{"iframe", _, _}]} <- Floki.parse_fragment(html) do
html
else
_ -> ""
end
end
def to_map(%Card{} = card) do
card
|> Map.from_struct()
|> stringify_keys()
end
def to_map(%{} = card), do: stringify_keys(card)
defp stringify_keys(%{} = map), do: Map.new(map, fn {k, v} -> {Atom.to_string(k), v} end)
def fix_uri("http://" <> _ = uri, _base_uri), do: uri
def fix_uri("https://" <> _ = uri, _base_uri), do: uri
def fix_uri("/" <> _ = uri, base_uri), do: URI.merge(base_uri, uri) |> URI.to_string()
def fix_uri("", _base_uri), do: nil
def fix_uri(uri, base_uri) when is_binary(uri),
do: URI.merge(base_uri, "/#{uri}") |> URI.to_string()
def fix_uri(_uri, _base_uri), do: nil
defp proxy(url) when is_binary(url), do: Pleroma.Web.MediaProxy.url(url)
defp proxy(_), do: nil
def validate(%Card{type: type, html: html} = card)
when type in ["video", "rich"] and (is_binary(html) == false or html == "") do
card
|> Map.put(:type, "link")
|> validate()
end
def validate(%Card{type: type, title: title} = card)
when type in @types and is_binary(title) and title != "" do
{:ok, card}
end
def validate(%Embed{} = embed) do
case Card.parse(embed) do
{:ok, %Card{} = card} -> validate(card)
card -> {:error, {:invalid_metadata, card}}
end
end
def validate(card), do: {:error, {:invalid_metadata, card}}
end

View file

@ -1,10 +0,0 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser.Embed do
@moduledoc """
Represents embedded content, including scraped markup and OEmbed.
"""
defstruct url: nil, title: nil, meta: nil, oembed: nil
end

View file

@ -1,39 +0,0 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser.MetaTags do
@doc """
Parses a `Floki.html_tree/0` and returns a map of raw `<meta>` tag values.
"""
@spec parse(html_tree :: Floki.html_tree()) :: map()
def parse(html_tree) do
html_tree
|> Floki.find("meta")
|> Enum.reduce(%{}, fn html_node, acc ->
case parse_node(html_node) do
{:ok, {name, content}} -> Map.put(acc, name, content)
_ -> acc
end
end)
|> clean_data()
end
defp parse_node({_tag, attrs, _children}) when is_list(attrs) do
case Map.new(attrs) do
%{"name" => name, "content" => content} -> {:ok, {name, content}}
%{"property" => name, "content" => content} -> {:ok, {name, content}}
_ -> {:error, :invalid_meta_tag}
end
end
defp parse_node(_), do: {:error, :invalid_meta_tag}
defp clean_data(data) do
data
|> Enum.reject(fn {key, val} ->
not match?({:ok, _}, Jason.encode(%{key => val}))
end)
|> Map.new()
end
end

View file

@ -0,0 +1,46 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
def parse(data, html, prefix, key_name, value_name \\ "content") do
html
|> get_elements(key_name, prefix)
|> Enum.reduce(data, fn el, acc ->
attributes = normalize_attributes(el, prefix, key_name, value_name)
Map.merge(acc, attributes)
end)
|> maybe_put_title(html)
end
defp get_elements(html, key_name, prefix) do
html |> Floki.find("meta[#{key_name}^='#{prefix}:']")
end
defp normalize_attributes(html_node, prefix, key_name, value_name) do
{_tag, attributes, _children} = html_node
data =
Map.new(attributes, fn {name, value} ->
{name, String.trim_leading(value, "#{prefix}:")}
end)
%{data[key_name] => data[value_name]}
end
defp maybe_put_title(%{"title" => _} = meta, _), do: meta
defp maybe_put_title(meta, html) when meta != %{} do
case get_page_title(html) do
"" -> meta
title -> Map.put_new(meta, "title", title)
end
end
defp maybe_put_title(meta, _), do: meta
defp get_page_title(html) do
Floki.find(html, "html head title") |> List.first() |> Floki.text()
end
end

View file

@ -3,18 +3,13 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
def parse(html, data) do
def parse(html, _data) do
with elements = [_ | _] <- get_discovery_data(html),
oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
{:ok, oembed_data = %{"html" => html}} <- get_oembed_data(oembed_url) do
data
|> Map.put(
:oembed,
oembed_data
|> Map.put("html", Pleroma.HTML.filter_tags(html))
)
%{oembed_data | "html" => Pleroma.HTML.filter_tags(html)}
else
_e -> data
_e -> %{}
end
end
@ -27,7 +22,7 @@ defp get_oembed_url([{"link", attributes, _children} | _]) do
end
defp get_oembed_data(url) do
with {:ok, %Tesla.Env{body: json}} <- Pleroma.Web.RichMedia.Helpers.oembed_get(url) do
with {:ok, %Tesla.Env{body: json}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url) do
Jason.decode(json)
end
end

View file

@ -4,5 +4,7 @@
defmodule Pleroma.Web.RichMedia.Parsers.OGP do
@deprecated "OGP parser is deprecated. Use TwitterCard instead."
def parse(_html, data), do: data
def parse(_html, _data) do
%{}
end
end

View file

@ -3,22 +3,13 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do
alias Pleroma.Web.RichMedia.Parser.MetaTags
alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser
@spec parse(Floki.html_tree(), map()) :: map()
@spec parse(list(), map()) :: map()
def parse(html, data) do
data
|> Map.put(:title, get_page_title(html))
|> Map.put(:meta, MetaTags.parse(html))
end
def get_page_title(html) do
with [node | _] <- Floki.find(html, "html head title"),
title when is_binary(title) and title != "" <- Floki.text(node),
true <- String.valid?(title) do
title
else
_ -> nil
end
|> MetaTagsParser.parse(html, "og", "property")
|> MetaTagsParser.parse(html, "twitter", "name")
|> MetaTagsParser.parse(html, "twitter", "property")
end
end

View file

@ -183,7 +183,6 @@ defp deps do
ref: "e0f16822d578866e186a0974d65ad58cddc1e2ab"},
{:restarter, path: "./restarter"},
{:majic, "~> 1.0"},
{:oembed_providers, "~> 0.1.0"},
{:open_api_spex, "~> 3.16"},
{:ecto_psql_extras, "~> 0.6"},
{:vix, "~> 0.26.0"},

View file

@ -99,7 +99,6 @@
"oauther": {:hex, :oauther, "1.3.0", "82b399607f0ca9d01c640438b34d74ebd9e4acd716508f868e864537ecdb1f76", [:mix], [], "hexpm", "78eb888ea875c72ca27b0864a6f550bc6ee84f2eeca37b093d3d833fbcaec04e"},
"oban": {:hex, :oban, "2.13.6", "a0cb1bce3bd393770512231fb5a3695fa19fd3af10d7575bf73f837aee7abf43", [:mix], [{:ecto_sql, "~> 3.6", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:postgrex, "~> 0.16", [hex: :postgrex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "3c1c5eb16f377b3cbbf2ea14be24d20e3d91285af9d1ac86260b7c2af5464887"},
"octo_fetch": {:hex, :octo_fetch, "0.4.0", "074b5ecbc08be10b05b27e9db08bc20a3060142769436242702931c418695b19", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~> 1.1", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "cf8be6f40cd519d7000bb4e84adcf661c32e59369ca2827c4e20042eda7a7fc6"},
"oembed_providers": {:hex, :oembed_providers, "0.1.0", "9b336ee5f3ca20ee4ed005383c74b154d30d0abeb98e95828855c0e2841ae46b", [:mix], [{:glob, "~> 1.0", [hex: :glob, repo: "hexpm", optional: false]}, {:jason, "~> 1.2", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "ac1dda0f743aa6fdead3eef59decfefc9de91d550bf0805b8fce16ed10d421ba"},
"open_api_spex": {:hex, :open_api_spex, "3.18.2", "8c855e83bfe8bf81603d919d6e892541eafece3720f34d1700b58024dadde247", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}, {:poison, "~> 3.0 or ~> 4.0 or ~> 5.0", [hex: :poison, repo: "hexpm", optional: true]}, {:ymlr, "~> 2.0 or ~> 3.0 or ~> 4.0", [hex: :ymlr, repo: "hexpm", optional: true]}], "hexpm", "aa3e6dcfc0ad6a02596b2172662da21c9dd848dac145ea9e603f54e3d81b8d2b"},
"parallel_stream": {:hex, :parallel_stream, "1.0.6", "b967be2b23f0f6787fab7ed681b4c45a215a81481fb62b01a5b750fa8f30f76c", [:mix], [], "hexpm", "639b2e8749e11b87b9eb42f2ad325d161c170b39b288ac8d04c4f31f8f0823eb"},
"parse_trans": {:hex, :parse_trans, "3.4.1", "6e6aa8167cb44cc8f39441d05193be6e6f4e7c2946cb2759f015f8c56b76e5ff", [:rebar3], [], "hexpm", "620a406ce75dada827b82e453c19cf06776be266f5a67cff34e1ef2cbb60e49a"},

View file

@ -197,7 +197,7 @@ test "it detects language from JSON-LD context" do
"actor" => user.ap_id
}
{:ok, _create_activity, meta} = ObjectValidator.validate(note_activity, []) |> IO.inspect()
{:ok, _create_activity, meta} = ObjectValidator.validate(note_activity, [])
assert meta[:object_data]["language"] == "pl"
end

View file

@ -1717,6 +1717,7 @@ test "returns rich-media card", %{conn: conn, user: user} do
card_data = %{
"image" => "http://ia.media-imdb.com/images/rock.jpg",
"image_description" => "",
"provider_name" => "example.com",
"provider_url" => "https://example.com",
"title" => "The Rock",
@ -1724,13 +1725,16 @@ test "returns rich-media card", %{conn: conn, user: user} do
"url" => "https://example.com/ogp",
"description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
"author_name" => "",
"author_url" => "",
"blurhash" => nil,
"embed_url" => "",
"height" => 0,
"html" => "",
"width" => 0
"pleroma" => %{
"opengraph" => %{
"image" => "http://ia.media-imdb.com/images/rock.jpg",
"title" => "The Rock",
"type" => "video.movie",
"url" => "https://example.com/ogp",
"description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer."
}
}
}
response =
@ -1767,16 +1771,17 @@ test "replaces missing description with an empty string", %{conn: conn, user: us
"title" => "Pleroma",
"description" => "",
"image" => nil,
"image_description" => "",
"provider_name" => "example.com",
"provider_url" => "https://example.com",
"url" => "https://example.com/ogp-missing-data",
"author_name" => "",
"author_url" => "",
"blurhash" => nil,
"embed_url" => "",
"height" => 0,
"html" => "",
"width" => 0
"pleroma" => %{
"opengraph" => %{
"title" => "Pleroma",
"type" => "website",
"url" => "https://example.com/ogp-missing-data"
}
}
}
end
end

View file

@ -17,7 +17,6 @@ defmodule Pleroma.Web.MastodonAPI.StatusViewTest do
alias Pleroma.Web.CommonAPI
alias Pleroma.Web.MastodonAPI.AccountView
alias Pleroma.Web.MastodonAPI.StatusView
alias Pleroma.Web.RichMedia.Parser.Embed
require Bitwise
@ -448,7 +447,7 @@ test "a quote post" do
assert status.pleroma.quote_url == Object.normalize(quote_post).data["id"]
assert status.pleroma.quote_visible
# Quotes don't go more than one level deep\
# Quotes don't go more than one level deep
refute status.pleroma.quote.pleroma.quote
assert status.pleroma.quote.pleroma.quote_id == to_string(post.id)
assert status.pleroma.quote.pleroma.quote_url == Object.normalize(post).data["id"]
@ -756,45 +755,57 @@ test "it returns a a dictionary tags" do
describe "rich media cards" do
test "a rich media card without a site name renders correctly" do
embed = %Embed{
url: "http://example.com",
title: "Example website",
meta: %{"twitter:image" => "http://example.com/example.jpg"}
}
page_url = "http://example.com"
%{"provider_name" => "example.com"} = StatusView.render("card.json", %{embed: embed})
end
test "a rich media card without a site name or image renders correctly" do
embed = %Embed{
url: "http://example.com",
card = %{
url: page_url,
image: page_url <> "/example.jpg",
title: "Example website"
}
%{"provider_name" => "example.com"} = StatusView.render("card.json", %{embed: embed})
%{provider_name: "example.com"} =
StatusView.render("card.json", %{page_url: page_url, rich_media: card})
end
test "a rich media card without a site name or image renders correctly" do
page_url = "http://example.com"
card = %{
url: page_url,
title: "Example website"
}
%{provider_name: "example.com"} =
StatusView.render("card.json", %{page_url: page_url, rich_media: card})
end
test "a rich media card without an image renders correctly" do
embed = %Embed{
url: "http://example.com",
title: "Example website",
meta: %{"twitter:title" => "Example site name"}
page_url = "http://example.com"
card = %{
url: page_url,
site_name: "Example site name",
title: "Example website"
}
%{"provider_name" => "example.com"} = StatusView.render("card.json", %{embed: embed})
%{provider_name: "example.com"} =
StatusView.render("card.json", %{page_url: page_url, rich_media: card})
end
test "a rich media card with all relevant data renders correctly" do
embed = %Embed{
url: "http://example.com",
page_url = "http://example.com"
card = %{
"image:alt" => "Example image description",
url: page_url,
site_name: "Example site name",
title: "Example website",
meta: %{
"twitter:title" => "Example site name",
"twitter:image" => "http://example.com/example.jpg"
}
image: page_url <> "/example.jpg",
description: "Example description"
}
%{"provider_name" => "example.com"} = StatusView.render("card.json", %{embed: embed})
%{provider_name: "example.com", image_description: "Example image description"} =
StatusView.render("card.json", %{page_url: page_url, rich_media: card})
end
test "a rich media card has all media proxied" do

View file

@ -3,38 +3,28 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.PleromaAPI.ChatMessageReferenceViewTest do
use Pleroma.DataCase, async: false
alias Pleroma.NullCache
use Pleroma.DataCase, async: true
alias Pleroma.Chat
alias Pleroma.Chat.MessageReference
alias Pleroma.Object
alias Pleroma.StaticStubbedConfigMock, as: ConfigMock
alias Pleroma.StaticStubbedConfigMock
alias Pleroma.UnstubbedConfigMock, as: ConfigMock
alias Pleroma.Web.ActivityPub.ActivityPub
alias Pleroma.Web.CommonAPI
alias Pleroma.Web.PleromaAPI.Chat.MessageReferenceView
import Mox
import Pleroma.Factory
import Tesla.Mock
test "crawls valid, complete URLs" do
mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
ConfigMock
|> stub(:get, fn
[:rich_media, :enabled] -> true
path -> Pleroma.Test.StaticConfig.get(path)
end)
Pleroma.UnstubbedConfigMock
|> stub(:get, fn
[:rich_media, :enabled] -> true
path -> Pleroma.Test.StaticConfig.get(path)
end)
test "it displays a chat message" do
user = insert(:user)
recipient = insert(:user)
ConfigMock
|> stub_with(Pleroma.Test.StaticConfig)
file = %Plug.Upload{
content_type: "image/jpeg",
path: Path.absname("test/fixtures/image.jpg"),
@ -52,14 +42,14 @@ test "crawls valid, complete URLs" do
cm_ref = MessageReference.for_chat_and_object(chat, object)
{:ok, activity} =
CommonAPI.post(user, %{
status: "[test](https://example.com/ogp)",
content_type: "text/markdown"
})
id = cm_ref.id
assert %{url: "https://example.com/ogp", meta: %{} = _} =
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
Pleroma.CachexMock
|> stub(:get, fn
:chat_message_id_idempotency_key_cache, ^id -> {:ok, "123"}
cache, key -> NullCache.get(cache, key)
end)
|> stub(:fetch, fn :rich_media_cache, _, _ -> {:ok, {:ok, %{}}} end)
chat_message = MessageReferenceView.render("show.json", chat_message_reference: cm_ref)
@ -70,6 +60,18 @@ test "crawls valid, complete URLs" do
assert chat_message[:created_at]
assert chat_message[:unread] == false
assert match?([%{shortcode: "firefox"}], chat_message[:emojis])
assert chat_message[:idempotency_key] == "123"
StaticStubbedConfigMock
|> stub(:get, fn
[:rich_media, :enabled] -> true
path -> Pleroma.Test.StaticConfig.get(path)
end)
Tesla.Mock.mock_global(fn
%{url: "https://example.com/ogp"} ->
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}
end)
{:ok, activity} =
CommonAPI.post_chat_message(recipient, user, "gkgkgk https://example.com/ogp",

View file

@ -43,7 +43,7 @@ test "refuses to crawl incomplete URLs" do
path -> Pleroma.Test.StaticConfig.get(path)
end)
assert Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) == nil
assert %{} == Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
end
test "refuses to crawl malformed URLs" do
@ -61,7 +61,7 @@ test "refuses to crawl malformed URLs" do
path -> Pleroma.Test.StaticConfig.get(path)
end)
assert Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) == nil
assert %{} == Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
end
test "crawls valid, complete URLs" do
@ -79,7 +79,7 @@ test "crawls valid, complete URLs" do
path -> Pleroma.Test.StaticConfig.get(path)
end)
assert %{url: "https://example.com/ogp", meta: %{} = _} =
assert %{page_url: "https://example.com/ogp", rich_media: _} =
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
end
@ -97,7 +97,7 @@ test "recrawls URLs on updates" do
{:ok, activity} = CommonAPI.post(user, %{status: "I like this site #{original_url}"})
assert match?(
%{url: ^original_url, meta: _},
%{page_url: ^original_url, rich_media: _},
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
)
@ -106,7 +106,7 @@ test "recrawls URLs on updates" do
activity = Pleroma.Activity.get_by_id(activity.id)
assert match?(
%{url: ^updated_url, meta: _},
%{page_url: ^updated_url, rich_media: _},
Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity)
)
end
@ -128,10 +128,10 @@ test "refuses to crawl URLs of private network from posts" do
path -> Pleroma.Test.StaticConfig.get(path)
end)
assert Helpers.fetch_data_for_activity(activity) == nil
assert Helpers.fetch_data_for_activity(activity2) == nil
assert Helpers.fetch_data_for_activity(activity3) == nil
assert Helpers.fetch_data_for_activity(activity4) == nil
assert Helpers.fetch_data_for_activity(activity5) == nil
assert %{} == Helpers.fetch_data_for_activity(activity)
assert %{} == Helpers.fetch_data_for_activity(activity2)
assert %{} == Helpers.fetch_data_for_activity(activity3)
assert %{} == Helpers.fetch_data_for_activity(activity4)
assert %{} == Helpers.fetch_data_for_activity(activity5)
end
end

View file

@ -1,129 +0,0 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser.CardTest do
use ExUnit.Case, async: true
alias Pleroma.Web.RichMedia.Parser.Card
alias Pleroma.Web.RichMedia.Parser.Embed
alias Pleroma.Web.RichMedia.Parsers.TwitterCard
describe "parse/1" do
test "converts an %Embed{} into a %Card{}" do
url =
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
embed =
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html")
|> Floki.parse_document!()
|> TwitterCard.parse(%Embed{url: url})
expected = %Card{
description:
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
image:
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
title: "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
type: "link",
provider_name: "www.nytimes.com",
provider_url: "https://www.nytimes.com",
url: url
}
assert Card.parse(embed) == {:ok, expected}
end
test "converts URL paths into absolute URLs" do
embed = %Embed{
url: "https://spam.com/luigi",
title: "Watch Luigi not doing anything",
meta: %{
"og:image" => "/uploads/weegee.jpeg"
}
}
{:ok, card} = Card.parse(embed)
assert card.image == "https://spam.com/uploads/weegee.jpeg"
end
test "falls back to Link with invalid Rich/Video" do
url = "https://ishothim.com/our-work/mexican-drug-cartels/"
oembed = File.read!("test/fixtures/rich_media/wordpress_embed.json") |> Jason.decode!()
embed =
File.read!("test/fixtures/rich_media/wordpress.html")
|> Floki.parse_document!()
|> TwitterCard.parse(%Embed{url: url, oembed: oembed})
expected = %Card{
author_name: "Michael Jeter",
author_url: "https://ishothim.com/author/mike/",
blurhash: nil,
description:
"I Shot Him collaborated with the folks at Visual.ly on this informative animation about the violence from drug cartels happening right across our border. We researched, wrote, illustrated, and animated this piece to inform people about the connections of our drug and gun laws to the death of innocence in Mexico.",
embed_url: nil,
height: 338,
html: "",
image: "https://ishothim.com/wp-content/uploads/2013/01/Cartel_feature.jpg",
provider_name: "I Shot Him",
provider_url: "https://ishothim.com",
title: "Mexican Drug Cartels",
type: "link",
url: "https://ishothim.com/our-work/mexican-drug-cartels/",
width: 600
}
assert Card.parse(embed) == {:ok, expected}
end
end
describe "validate/1" do
test "returns {:ok, card} with a valid %Card{}" do
card = %Card{
title: "Moms can't believe this one trick",
url: "http://spam.com",
type: "link"
}
assert {:ok, ^card} = Card.validate(card)
end
end
describe "fix_uri/2" do
setup do: %{base_uri: "https://benis.xyz/hello/fam"}
test "two full URLs", %{base_uri: base_uri} do
uri = "https://benis.xyz/images/pic.jpeg"
assert Card.fix_uri(uri, base_uri) == uri
end
test "URI with leading slash", %{base_uri: base_uri} do
uri = "/images/pic.jpeg"
expected = "https://benis.xyz/images/pic.jpeg"
assert Card.fix_uri(uri, base_uri) == expected
end
test "URI without leading slash", %{base_uri: base_uri} do
uri = "images/pic.jpeg"
expected = "https://benis.xyz/images/pic.jpeg"
assert Card.fix_uri(uri, base_uri) == expected
end
test "empty URI", %{base_uri: base_uri} do
assert Card.fix_uri("", base_uri) == nil
end
test "nil URI", %{base_uri: base_uri} do
assert Card.fix_uri(nil, base_uri) == nil
end
# https://github.com/elixir-lang/elixir/issues/10771
test "Elixir #10771", _ do
uri =
"https://images.macrumors.com/t/4riJyi1XC906qyJ41nAfOgpvo1I=/1600x/https://images.macrumors.com/article-new/2020/09/spatialaudiofeature.jpg"
base_uri = "https://www.macrumors.com/guide/apps-support-apples-spatial-audio-feature/"
assert Card.fix_uri(uri, base_uri) == uri
end
end
end

View file

@ -1,81 +0,0 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser.MetaTagsTest do
use ExUnit.Case, async: true
alias Pleroma.Web.RichMedia.Parser.MetaTags
test "returns a map of <meta> values" do
html =
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html")
|> Floki.parse_document!()
expected = %{
"CG" => "nyregion",
"CN" => "experience-tech-and-society",
"CT" => "spotlight",
"PST" => "News",
"PT" => "article",
"SCG" => "",
"al:android:app_name" => "NYTimes",
"al:android:package" => "com.nytimes.android",
"al:android:url" => "nytimes://reader/id/100000006583622",
"al:ipad:app_name" => "NYTimes",
"al:ipad:app_store_id" => "357066198",
"al:ipad:url" =>
"nytimes://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"al:iphone:app_name" => "NYTimes",
"al:iphone:app_store_id" => "284862083",
"al:iphone:url" =>
"nytimes://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"article:modified" => "2019-08-02T09:30:23.000Z",
"article:published" => "2019-08-01T17:15:31.000Z",
"article:section" => "New York",
"article:tag" => "New York City",
"articleid" => "100000006583622",
"byl" => "By Joseph Goldstein and Ali Watkins",
"description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"fb:app_id" => "9869919170",
"image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
"msapplication-starturl" => "https://www.nytimes.com",
"news_keywords" =>
"NYPD,Juvenile delinquency,Facial Recognition,Privacy,Government Surveillance,Police,Civil Rights,NYC",
"nyt_uri" => "nyt://article/9da58246-2495-505f-9abd-b5fda8e67b56",
"og:description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"og:image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
"og:title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"og:type" => "article",
"og:url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"pdate" => "20190801",
"pubp_event_id" => "pubp://event/47a657bafa8a476bb36832f90ee5ac6e",
"robots" => "noarchive",
"thumbnail" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-thumbStandard.jpg",
"twitter:app:id:googleplay" => "com.nytimes.android",
"twitter:app:name:googleplay" => "NYTimes",
"twitter:app:url:googleplay" => "nytimes://reader/id/100000006583622",
"twitter:card" => "summary_large_image",
"twitter:description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"twitter:image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
"twitter:image:alt" => "",
"twitter:title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"twitter:url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"viewport" => "width=device-width, initial-scale=1, maximum-scale=1"
}
assert MetaTags.parse(html) == expected
end
end

View file

@ -6,7 +6,6 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
use Pleroma.DataCase, async: false
alias Pleroma.Web.RichMedia.Parser
alias Pleroma.Web.RichMedia.Parser.Embed
import Tesla.Mock
@ -14,123 +13,84 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
end
test "returns empty embed when no metadata present" do
expected = %Embed{
meta: %{},
oembed: nil,
title: nil,
url: "https://example.com/empty"
}
test "returns error when no metadata present" do
assert {:error, _} = Parser.parse("https://example.com/empty")
end
assert Parser.parse("https://example.com/empty") == {:ok, expected}
test "doesn't just add a title" do
assert {:error, {:invalid_metadata, _}} = Parser.parse("https://example.com/non-ogp")
end
test "parses ogp" do
url = "https://example.com/ogp"
expected = %Embed{
meta: %{
"og:image" => "http://ia.media-imdb.com/images/rock.jpg",
"og:title" => "The Rock",
"og:description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
"og:type" => "video.movie",
"og:url" => "http://www.imdb.com/title/tt0117500/"
},
oembed: nil,
title: "The Rock (1996)",
url: "https://example.com/ogp"
}
assert Parser.parse(url) == {:ok, expected}
assert Parser.parse("https://example.com/ogp") ==
{:ok,
%{
"image" => "http://ia.media-imdb.com/images/rock.jpg",
"title" => "The Rock",
"description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
"type" => "video.movie",
"url" => "https://example.com/ogp"
}}
end
test "gets <title> tag" do
url = "https://example.com/ogp-missing-title"
expected = "The Rock (1996)"
assert {:ok, %Embed{title: ^expected}} = Parser.parse(url)
test "falls back to <title> when ogp:title is missing" do
assert Parser.parse("https://example.com/ogp-missing-title") ==
{:ok,
%{
"image" => "http://ia.media-imdb.com/images/rock.jpg",
"title" => "The Rock (1996)",
"description" =>
"Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
"type" => "video.movie",
"url" => "https://example.com/ogp-missing-title"
}}
end
test "parses twitter card" do
url = "https://example.com/twitter-card"
expected = %Embed{
meta: %{
"twitter:card" => "summary",
"twitter:description" => "View the album on Flickr.",
"twitter:image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
"twitter:site" => "@flickr",
"twitter:title" => "Small Island Developing States Photo Submission"
},
oembed: nil,
title: nil,
url: "https://example.com/twitter-card"
}
assert Parser.parse(url) == {:ok, expected}
assert Parser.parse("https://example.com/twitter-card") ==
{:ok,
%{
"card" => "summary",
"site" => "@flickr",
"image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
"title" => "Small Island Developing States Photo Submission",
"description" => "View the album on Flickr.",
"url" => "https://example.com/twitter-card"
}}
end
test "parses OEmbed" do
url = "https://example.com/oembed"
expected = %Embed{
meta: %{},
oembed: %{
"author_name" => "\u202E\u202D\u202Cbees\u202C",
"author_url" => "https://www.flickr.com/photos/bees/",
"cache_age" => 3600,
"flickr_type" => "photo",
"height" => "768",
"html" =>
"<a href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by \u202E\u202D\u202Cbees\u202C, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"/></a>",
"license" => "All Rights Reserved",
"license_id" => 0,
"provider_name" => "Flickr",
"provider_url" => "https://www.flickr.com/",
"thumbnail_height" => 150,
"thumbnail_url" => "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg",
"thumbnail_width" => 150,
"title" => "Bacon Lollys",
"type" => "photo",
"url" => "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg",
"version" => "1.0",
"web_page" => "https://www.flickr.com/photos/bees/2362225867/",
"web_page_short_url" => "https://flic.kr/p/4AK2sc",
"width" => "1024"
},
url: "https://example.com/oembed"
}
assert Parser.parse(url) == {:ok, expected}
test "parses OEmbed and filters HTML tags" do
assert Parser.parse("https://example.com/oembed") ==
{:ok,
%{
"author_name" => "\u202E\u202D\u202Cbees\u202C",
"author_url" => "https://www.flickr.com/photos/bees/",
"cache_age" => 3600,
"flickr_type" => "photo",
"height" => "768",
"html" =>
"<a href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by \u202E\u202D\u202Cbees\u202C, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"/></a>",
"license" => "All Rights Reserved",
"license_id" => 0,
"provider_name" => "Flickr",
"provider_url" => "https://www.flickr.com/",
"thumbnail_height" => 150,
"thumbnail_url" =>
"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg",
"thumbnail_width" => 150,
"title" => "Bacon Lollys",
"type" => "photo",
"url" => "https://example.com/oembed",
"version" => "1.0",
"web_page" => "https://www.flickr.com/photos/bees/2362225867/",
"web_page_short_url" => "https://flic.kr/p/4AK2sc",
"width" => "1024"
}}
end
test "cleans corrupted meta data" do
expected = %Embed{
meta: %{
"Keywords" => "Konsument i zakupy",
"ROBOTS" => "NOARCHIVE",
"fb:app_id" => "515714931781741",
"fb:pages" => "288018984602680",
"google-site-verification" => "3P4BE3hLw82QWqtseIE60qQcOtrpMxMnCNkcv62pjTA",
"news_keywords" => "Konsument i zakupy",
"og:image" =>
"https://bi.im-g.pl/im/f7/49/17/z24418295FBW,Prace-nad-projektem-chusty-antysmogowej-rozpoczely.jpg",
"og:locale" => "pl_PL",
"og:site_name" => "wyborcza.biz",
"og:type" => "article",
"og:url" =>
"http://wyborcza.biz/biznes/7,147743,24417936,pomysl-na-biznes-chusta-ktora-chroni-przed-smogiem.html",
"twitter:card" => "summary_large_image",
"twitter:image" =>
"https://bi.im-g.pl/im/f7/49/17/z24418295FBW,Prace-nad-projektem-chusty-antysmogowej-rozpoczely.jpg",
"viewport" => "width=device-width, user-scalable=yes"
},
oembed: nil,
title: nil,
url: "https://example.com/malformed"
}
assert Parser.parse("https://example.com/malformed") == {:ok, expected}
test "rejects invalid OGP data" do
assert {:error, _} = Parser.parse("https://example.com/malformed")
end
test "returns error if getting page was not successful" do

View file

@ -6,10 +6,8 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
use ExUnit.Case, async: true
alias Pleroma.Web.RichMedia.Parsers.TwitterCard
test "fails gracefully with barebones HTML" do
html = [{"html", [], [{"head", [], []}, {"body", [], []}]}]
expected = %{meta: %{}, title: nil}
assert TwitterCard.parse(html, %{}) == expected
test "returns error when html not contains twitter card" do
assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) == %{}
end
test "parses twitter card with only name attributes" do
@ -17,24 +15,22 @@ test "parses twitter card with only name attributes" do
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers3.html")
|> Floki.parse_document!()
assert %{
title:
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
meta: %{
"twitter:app:id:googleplay" => "com.nytimes.android",
"twitter:app:name:googleplay" => "NYTimes",
"twitter:app:url:googleplay" => "nytimes://reader/id/100000006583622",
"og:description" =>
assert TwitterCard.parse(html, %{}) ==
%{
"app:id:googleplay" => "com.nytimes.android",
"app:name:googleplay" => "NYTimes",
"app:url:googleplay" => "nytimes://reader/id/100000006583622",
"site" => nil,
"description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"og:image" =>
"image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
"og:title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"og:type" => "article",
"og:url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
"type" => "article",
"url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
}
} = TwitterCard.parse(html, %{})
end
test "parses twitter card with only property attributes" do
@ -42,31 +38,20 @@ test "parses twitter card with only property attributes" do
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers2.html")
|> Floki.parse_document!()
assert %{
title:
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
meta: %{
"twitter:card" => "summary_large_image",
"twitter:description" =>
assert TwitterCard.parse(html, %{}) ==
%{
"card" => "summary_large_image",
"description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"twitter:image" =>
"image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
"twitter:image:alt" => "",
"twitter:title" =>
"image:alt" => "",
"title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"twitter:url" =>
"url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"og:description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"og:image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
"og:title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"og:url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"og:type" => "article"
"type" => "article"
}
} = TwitterCard.parse(html, %{})
end
test "parses twitter card with name & property attributes" do
@ -74,43 +59,47 @@ test "parses twitter card with name & property attributes" do
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html")
|> Floki.parse_document!()
assert %{
title:
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
meta: %{
"twitter:app:id:googleplay" => "com.nytimes.android",
"twitter:app:name:googleplay" => "NYTimes",
"twitter:app:url:googleplay" => "nytimes://reader/id/100000006583622",
"twitter:card" => "summary_large_image",
"twitter:description" =>
assert TwitterCard.parse(html, %{}) ==
%{
"app:id:googleplay" => "com.nytimes.android",
"app:name:googleplay" => "NYTimes",
"app:url:googleplay" => "nytimes://reader/id/100000006583622",
"card" => "summary_large_image",
"description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"twitter:image" =>
"image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
"twitter:image:alt" => "",
"twitter:title" =>
"image:alt" => "",
"site" => nil,
"title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"twitter:url" =>
"url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"og:description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"og:image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
"og:title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"og:url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
"og:type" => "article"
"type" => "article"
}
} = TwitterCard.parse(html, %{})
end
test "respect only first title tag on the page" do
image_path =
"https://assets.atlasobscura.com/media/W1siZiIsInVwbG9hZHMvYXNzZXRzLzkwYzgyMzI4LThlMDUtNGRiNS05MDg3LTUzMGUxZTM5N2RmMmVkOTM5ZDM4MGM4OTIx" <>
"YTQ5MF9EQVIgZXhodW1hdGlvbiBvZiBNYXJnYXJldCBDb3JiaW4gZ3JhdmUgMTkyNi5qcGciXSxbInAiLCJjb252ZXJ0IiwiIl0sWyJwIiwiY29udmVydCIsIi1xdWFsaXR5IDgxIC1hdXRvLW9" <>
"yaWVudCJdLFsicCIsInRodW1iIiwiNjAweD4iXV0/DAR%20exhumation%20of%20Margaret%20Corbin%20grave%201926.jpg"
html =
File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!()
expected = "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura"
assert %{title: ^expected} = TwitterCard.parse(html, %{})
assert TwitterCard.parse(html, %{}) ==
%{
"site" => "@atlasobscura",
"title" => "The Missing Grave of Margaret Corbin, Revolutionary War Veteran",
"card" => "summary_large_image",
"image" => image_path,
"description" =>
"She's the only woman veteran honored with a monument at West Point. But where was she buried?",
"site_name" => "Atlas Obscura",
"type" => "article",
"url" => "http://www.atlasobscura.com/articles/margaret-corbin-grave-west-point"
}
end
test "takes first title found in html head if there is an html markup error" do
@ -118,9 +107,21 @@ test "takes first title found in html head if there is an html markup error" do
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers4.html")
|> Floki.parse_document!()
expected =
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
assert %{title: ^expected} = TwitterCard.parse(html, %{})
assert TwitterCard.parse(html, %{}) ==
%{
"site" => nil,
"title" =>
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"app:id:googleplay" => "com.nytimes.android",
"app:name:googleplay" => "NYTimes",
"app:url:googleplay" => "nytimes://reader/id/100000006583622",
"description" =>
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
"image" =>
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
"type" => "article",
"url" =>
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
}
end
end