From d38d537beebd1efe61778b2a26ecab0bed84d1c1 Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Sun, 24 Feb 2019 19:13:46 +0000 Subject: [PATCH] rich media: don't crawl bogus URIs --- lib/pleroma/web/rich_media/helpers.ex | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index abb1cf7f2b..fc9cbc868d 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -8,10 +8,17 @@ defmodule Pleroma.Web.RichMedia.Helpers do alias Pleroma.HTML alias Pleroma.Web.RichMedia.Parser + defp validate_page_url(nil), do: :error + defp validate_page_url(%URI{authority: nil}), do: :error + defp validate_page_url(%URI{scheme: nil}), do: :error + defp validate_page_url(%URI{}), do: :ok + defp validate_page_url(page_url), do: URI.parse(page_url) |> validate_page_url + def fetch_data_for_activity(%Activity{} = activity) do with true <- Pleroma.Config.get([:rich_media, :enabled]), %Object{} = object <- Object.normalize(activity.data["object"]), {:ok, page_url} <- HTML.extract_first_external_url(object, object.data["content"]), + :ok <- validate_page_url(page_url), {:ok, rich_media} <- Parser.parse(page_url) do %{page_url: page_url, rich_media: rich_media} else