Add OGP parser
This commit is contained in:
parent
32bed66471
commit
2aab4e03c3
7 changed files with 86 additions and 1 deletions
3
lib/pleroma/web/rich_media/data.ex
Normal file
3
lib/pleroma/web/rich_media/data.ex
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
defmodule Pleroma.Web.RichMedia.Data do
|
||||||
|
defstruct [:title, :type, :image, :url, :description]
|
||||||
|
end
|
14
lib/pleroma/web/rich_media/parser.ex
Normal file
14
lib/pleroma/web/rich_media/parser.ex
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
defmodule Pleroma.Web.RichMedia.Parser do
|
||||||
|
@parsers [Pleroma.Web.RichMedia.Parsers.OGP]
|
||||||
|
|
||||||
|
def parse(url) do
|
||||||
|
{:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url)
|
||||||
|
|
||||||
|
Enum.reduce_while(@parsers, %Pleroma.Web.RichMedia.Data{}, fn parser, acc ->
|
||||||
|
case parser.parse(html, acc) do
|
||||||
|
{:ok, data} -> {:halt, data}
|
||||||
|
{:error, _msg} -> {:cont, acc}
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
end
|
||||||
|
end
|
30
lib/pleroma/web/rich_media/parsers/ogp.ex
Normal file
30
lib/pleroma/web/rich_media/parsers/ogp.ex
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
defmodule Pleroma.Web.RichMedia.Parsers.OGP do
|
||||||
|
def parse(html, data) do
|
||||||
|
with elements = [_ | _] <- get_elements(html),
|
||||||
|
ogp_data =
|
||||||
|
Enum.reduce(elements, data, fn el, acc ->
|
||||||
|
attributes = normalize_attributes(el)
|
||||||
|
|
||||||
|
Map.merge(acc, attributes)
|
||||||
|
end) do
|
||||||
|
{:ok, ogp_data}
|
||||||
|
else
|
||||||
|
_e -> {:error, "No OGP metadata found"}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp get_elements(html) do
|
||||||
|
html |> Floki.find("meta[property^='og:']")
|
||||||
|
end
|
||||||
|
|
||||||
|
defp normalize_attributes(tuple) do
|
||||||
|
{_tag, attributes, _children} = tuple
|
||||||
|
|
||||||
|
data =
|
||||||
|
Enum.into(attributes, %{}, fn {name, value} ->
|
||||||
|
{name, String.trim_leading(value, "og:")}
|
||||||
|
end)
|
||||||
|
|
||||||
|
%{String.to_atom(data["property"]) => data["content"]}
|
||||||
|
end
|
||||||
|
end
|
3
mix.exs
3
mix.exs
|
@ -75,7 +75,8 @@ defp deps do
|
||||||
{:web_push_encryption, "~> 0.2.1"},
|
{:web_push_encryption, "~> 0.2.1"},
|
||||||
{:swoosh, "~> 0.20"},
|
{:swoosh, "~> 0.20"},
|
||||||
{:gen_smtp, "~> 0.13"},
|
{:gen_smtp, "~> 0.13"},
|
||||||
{:websocket_client, git: "https://github.com/jeremyong/websocket_client.git", only: :test}
|
{:websocket_client, git: "https://github.com/jeremyong/websocket_client.git", only: :test},
|
||||||
|
{:floki, "~> 0.20.0"}
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
2
mix.lock
2
mix.lock
|
@ -20,9 +20,11 @@
|
||||||
"ex_aws_s3": {:hex, :ex_aws_s3, "2.0.1", "9e09366e77f25d3d88c5393824e613344631be8db0d1839faca49686e99b6704", [:mix], [{:ex_aws, "~> 2.0", [hex: :ex_aws, repo: "hexpm", optional: false]}, {:sweet_xml, ">= 0.0.0", [hex: :sweet_xml, repo: "hexpm", optional: true]}], "hexpm"},
|
"ex_aws_s3": {:hex, :ex_aws_s3, "2.0.1", "9e09366e77f25d3d88c5393824e613344631be8db0d1839faca49686e99b6704", [:mix], [{:ex_aws, "~> 2.0", [hex: :ex_aws, repo: "hexpm", optional: false]}, {:sweet_xml, ">= 0.0.0", [hex: :sweet_xml, repo: "hexpm", optional: true]}], "hexpm"},
|
||||||
"ex_doc": {:hex, :ex_doc, "0.19.1", "519bb9c19526ca51d326c060cb1778d4a9056b190086a8c6c115828eaccea6cf", [:mix], [{:earmark, "~> 1.1", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.7", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"},
|
"ex_doc": {:hex, :ex_doc, "0.19.1", "519bb9c19526ca51d326c060cb1778d4a9056b190086a8c6c115828eaccea6cf", [:mix], [{:earmark, "~> 1.1", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.7", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"},
|
||||||
"ex_machina": {:hex, :ex_machina, "2.2.0", "fec496331e04fc2db2a1a24fe317c12c0c4a50d2beb8ebb3531ed1f0d84be0ed", [:mix], [{:ecto, "~> 2.1", [hex: :ecto, repo: "hexpm", optional: true]}], "hexpm"},
|
"ex_machina": {:hex, :ex_machina, "2.2.0", "fec496331e04fc2db2a1a24fe317c12c0c4a50d2beb8ebb3531ed1f0d84be0ed", [:mix], [{:ecto, "~> 2.1", [hex: :ecto, repo: "hexpm", optional: true]}], "hexpm"},
|
||||||
|
"floki": {:hex, :floki, "0.20.4", "be42ac911fece24b4c72f3b5846774b6e61b83fe685c2fc9d62093277fb3bc86", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}, {:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"},
|
||||||
"gen_smtp": {:hex, :gen_smtp, "0.13.0", "11f08504c4bdd831dc520b8f84a1dce5ce624474a797394e7aafd3c29f5dcd25", [:rebar3], [], "hexpm"},
|
"gen_smtp": {:hex, :gen_smtp, "0.13.0", "11f08504c4bdd831dc520b8f84a1dce5ce624474a797394e7aafd3c29f5dcd25", [:rebar3], [], "hexpm"},
|
||||||
"gettext": {:hex, :gettext, "0.15.0", "40a2b8ce33a80ced7727e36768499fc9286881c43ebafccae6bab731e2b2b8ce", [:mix], [], "hexpm"},
|
"gettext": {:hex, :gettext, "0.15.0", "40a2b8ce33a80ced7727e36768499fc9286881c43ebafccae6bab731e2b2b8ce", [:mix], [], "hexpm"},
|
||||||
"hackney": {:hex, :hackney, "1.14.3", "b5f6f5dcc4f1fba340762738759209e21914516df6be440d85772542d4a5e412", [:rebar3], [{:certifi, "2.4.2", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "1.0.2", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"},
|
"hackney": {:hex, :hackney, "1.14.3", "b5f6f5dcc4f1fba340762738759209e21914516df6be440d85772542d4a5e412", [:rebar3], [{:certifi, "2.4.2", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "1.0.2", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"},
|
||||||
|
"html_entities": {:hex, :html_entities, "0.4.0", "f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm"},
|
||||||
"html_sanitize_ex": {:hex, :html_sanitize_ex, "1.3.0", "f005ad692b717691203f940c686208aa3d8ffd9dd4bb3699240096a51fa9564e", [:mix], [{:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"},
|
"html_sanitize_ex": {:hex, :html_sanitize_ex, "1.3.0", "f005ad692b717691203f940c686208aa3d8ffd9dd4bb3699240096a51fa9564e", [:mix], [{:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"},
|
||||||
"httpoison": {:hex, :httpoison, "1.2.0", "2702ed3da5fd7a8130fc34b11965c8cfa21ade2f232c00b42d96d4967c39a3a3", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"},
|
"httpoison": {:hex, :httpoison, "1.2.0", "2702ed3da5fd7a8130fc34b11965c8cfa21ade2f232c00b42d96d4967c39a3a3", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"},
|
||||||
"idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"},
|
"idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"},
|
||||||
|
|
9
test/fixtures/rich_media/ogp.html
vendored
Normal file
9
test/fixtures/rich_media/ogp.html
vendored
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
<html prefix="og: http://ogp.me/ns#">
|
||||||
|
<head>
|
||||||
|
<title>The Rock (1996)</title>
|
||||||
|
<meta property="og:title" content="The Rock" />
|
||||||
|
<meta property="og:type" content="video.movie" />
|
||||||
|
<meta property="og:url" content="http://www.imdb.com/title/tt0117500/" />
|
||||||
|
<meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" />
|
||||||
|
</head>
|
||||||
|
</html>
|
26
test/web/rich_media/parser_test.exs
Normal file
26
test/web/rich_media/parser_test.exs
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
defmodule Pleroma.Web.RichMedia.ParserTest do
|
||||||
|
use ExUnit.Case, async: true
|
||||||
|
|
||||||
|
setup do
|
||||||
|
Tesla.Mock.mock(fn
|
||||||
|
%{
|
||||||
|
method: :get,
|
||||||
|
url: "http://example.com/ogp"
|
||||||
|
} ->
|
||||||
|
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}
|
||||||
|
end)
|
||||||
|
|
||||||
|
:ok
|
||||||
|
end
|
||||||
|
|
||||||
|
test "parses ogp" do
|
||||||
|
assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp") ==
|
||||||
|
%Pleroma.Web.RichMedia.Data{
|
||||||
|
description: nil,
|
||||||
|
image: "http://ia.media-imdb.com/images/rock.jpg",
|
||||||
|
title: "The Rock",
|
||||||
|
type: "video.movie",
|
||||||
|
url: "http://www.imdb.com/title/tt0117500/"
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in a new issue