From 5f6506d864239408e9fa3705c5dd7b241307241a Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 27 Aug 2024 20:39:32 -0400 Subject: [PATCH 01/11] Pleroma.HTTP: option stream: true will return a stream as the body for Gun adapter --- lib/pleroma/http/adapter_helper/gun.ex | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/pleroma/http/adapter_helper/gun.ex b/lib/pleroma/http/adapter_helper/gun.ex index 1fe8dd4b26..f9a8180f25 100644 --- a/lib/pleroma/http/adapter_helper/gun.ex +++ b/lib/pleroma/http/adapter_helper/gun.ex @@ -32,6 +32,7 @@ def options(incoming_opts \\ [], %URI{} = uri) do |> AdapterHelper.maybe_add_proxy(proxy) |> Keyword.merge(incoming_opts) |> put_timeout() + |> maybe_stream() end defp add_scheme_opts(opts, %{scheme: "http"}), do: opts @@ -47,6 +48,14 @@ defp put_timeout(opts) do Keyword.put(opts, :timeout, recv_timeout) end + # Tesla Gun adapter uses body_as: :stream + defp maybe_stream(opts) do + case Keyword.pop(opts, :stream, nil) do + {true, opts} -> Keyword.put(opts, :body_as, :stream) + {_, opts} -> opts + end + end + @spec pool_timeout(pool()) :: non_neg_integer() def pool_timeout(pool) do default = Config.get([:pools, :default, :recv_timeout], 5_000) From bb279c28025522764272468e3177a5f6701bc155 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 27 Aug 2024 21:08:25 -0400 Subject: [PATCH 02/11] Pleroma.HTTP add AdapterHelper.can_stream? to assist with discovering if the current adapter supports returning a Stream body --- lib/pleroma/http/adapter_helper.ex | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/pleroma/http/adapter_helper.ex b/lib/pleroma/http/adapter_helper.ex index dcb27a29d0..f8bde2ac34 100644 --- a/lib/pleroma/http/adapter_helper.ex +++ b/lib/pleroma/http/adapter_helper.ex @@ -118,4 +118,13 @@ def format_host(host) do host_charlist end end + + #TODO add Finch support once we have an AdapterHelper for it + @spec can_stream? :: bool() + def can_stream? do + case Application.get_env(:tesla, :adapter) do + Tesla.Adapter.Gun -> true + _ -> false + end + end end From ec8db9d4eedfade5a8b74425b21b07b3f4e44992 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 27 Aug 2024 21:09:15 -0400 Subject: [PATCH 03/11] RichMedia: skip the HTTP HEAD request for adapters that support streaming the response body --- lib/pleroma/web/rich_media/helpers.ex | 38 +++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index e2889b3516..88bfbae680 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -11,16 +11,39 @@ defmodule Pleroma.Web.RichMedia.Helpers do @spec rich_media_get(String.t()) :: {:ok, String.t()} | get_errors() def rich_media_get(url) do - headers = [{"user-agent", Pleroma.Application.user_agent() <> "; Bot"}] + case Pleroma.HTTP.AdapterHelper.can_stream?() do + true -> stream(url) + false -> head_first(url) + end + |> handle_result(url) + end + defp stream(url) do + with {_, {:ok, %Tesla.Env{status: 200, body: stream_body, headers: headers}}} <- + {:head, Pleroma.HTTP.get(url, req_headers(), http_options())}, + {_, :ok} <- {:content_type, check_content_type(headers)}, + {_, :ok} <- {:content_length, check_content_length(headers)} do + body = Enum.into(stream_body, <<>>) + {:ok, body} + end + end + + defp head_first(url) do with {_, {:ok, %Tesla.Env{status: 200, headers: headers}}} <- - {:head, Pleroma.HTTP.head(url, headers, http_options())}, + {:head, Pleroma.HTTP.head(url, req_headers(), http_options())}, {_, :ok} <- {:content_type, check_content_type(headers)}, {_, :ok} <- {:content_length, check_content_length(headers)}, {_, {:ok, %Tesla.Env{status: 200, body: body}}} <- - {:get, Pleroma.HTTP.get(url, headers, http_options())} do + {:get, Pleroma.HTTP.get(url, req_headers(), http_options())} do {:ok, body} - else + end + end + + defp handle_result(result, url) do + case result do + {:ok, body} -> + {:ok, body} + {:head, _} -> Logger.debug("Rich media error for #{url}: HTTP HEAD failed") {:error, :head} @@ -74,7 +97,12 @@ defp http_options do [ pool: :rich_media, max_body: Config.get([:rich_media, :max_body], 5_000_000), - tesla_middleware: [{Tesla.Middleware.Timeout, timeout: timeout}] + tesla_middleware: [{Tesla.Middleware.Timeout, timeout: timeout}], + stream: true ] end + + defp req_headers do + [{"user-agent", Pleroma.Application.user_agent() <> "; Bot"}] + end end From 0a86d2b3ac9c90a16aec1237019ecfcb1e680728 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 27 Aug 2024 21:22:59 -0400 Subject: [PATCH 04/11] Handle streaming response errors --- lib/pleroma/http/adapter_helper.ex | 2 +- lib/pleroma/web/rich_media/helpers.ex | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/lib/pleroma/http/adapter_helper.ex b/lib/pleroma/http/adapter_helper.ex index f8bde2ac34..4dbcccdcc1 100644 --- a/lib/pleroma/http/adapter_helper.ex +++ b/lib/pleroma/http/adapter_helper.ex @@ -119,7 +119,7 @@ def format_host(host) do end end - #TODO add Finch support once we have an AdapterHelper for it + # TODO add Finch support once we have an AdapterHelper for it @spec can_stream? :: bool() def can_stream? do case Application.get_env(:tesla, :adapter) do diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index 88bfbae680..db1310b23f 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -11,10 +11,10 @@ defmodule Pleroma.Web.RichMedia.Helpers do @spec rich_media_get(String.t()) :: {:ok, String.t()} | get_errors() def rich_media_get(url) do - case Pleroma.HTTP.AdapterHelper.can_stream?() do - true -> stream(url) - false -> head_first(url) - end + case Pleroma.HTTP.AdapterHelper.can_stream?() do + true -> stream(url) + false -> head_first(url) + end |> handle_result(url) end @@ -22,8 +22,8 @@ defp stream(url) do with {_, {:ok, %Tesla.Env{status: 200, body: stream_body, headers: headers}}} <- {:head, Pleroma.HTTP.get(url, req_headers(), http_options())}, {_, :ok} <- {:content_type, check_content_type(headers)}, - {_, :ok} <- {:content_length, check_content_length(headers)} do - body = Enum.into(stream_body, <<>>) + {_, :ok} <- {:content_length, check_content_length(headers)}, + body <- Enum.into(stream_body, <<>>) do {:ok, body} end end @@ -59,6 +59,10 @@ defp handle_result(result, url) do {:get, _} -> Logger.debug("Rich media error for #{url}: HTTP GET failed") {:error, :get} + + {:error, :recv_chunk_timeout} -> + Logger.debug("Rich media error for #{url}: HTTP streaming response failed") + {:error, :get} end end From 116fe77b77eedd2feb073d3be256fea08169c95b Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 27 Aug 2024 21:55:06 -0400 Subject: [PATCH 05/11] Tesla.Middleware.Timeout breaks streaming bodies These are executed by Oban now and Oban can enforce the timeout if the regular HTTP timeout is not sufficient. --- lib/pleroma/web/rich_media/helpers.ex | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index db1310b23f..880d19218f 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -96,12 +96,9 @@ defp check_content_length(headers) do end defp http_options do - timeout = Config.get!([:rich_media, :timeout]) - [ pool: :rich_media, max_body: Config.get([:rich_media, :max_body], 5_000_000), - tesla_middleware: [{Tesla.Middleware.Timeout, timeout: timeout}], stream: true ] end From 44901502ffd7713d498976e2d2b9a55c298f1876 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 27 Aug 2024 21:56:02 -0400 Subject: [PATCH 06/11] Fix incorrect identifier for the with statement --- lib/pleroma/web/rich_media/helpers.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index 880d19218f..b819843433 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -20,7 +20,7 @@ def rich_media_get(url) do defp stream(url) do with {_, {:ok, %Tesla.Env{status: 200, body: stream_body, headers: headers}}} <- - {:head, Pleroma.HTTP.get(url, req_headers(), http_options())}, + {:get, Pleroma.HTTP.get(url, req_headers(), http_options())}, {_, :ok} <- {:content_type, check_content_type(headers)}, {_, :ok} <- {:content_length, check_content_length(headers)}, body <- Enum.into(stream_body, <<>>) do From 0804b73c0ae5846a133386c09970546375e3d918 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 27 Aug 2024 22:08:29 -0400 Subject: [PATCH 07/11] This error is not returned by Tesla Upstream has a bug filed for this as they aren't handling this error internally, so it was raising --- lib/pleroma/web/rich_media/helpers.ex | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index b819843433..a242ca640a 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -59,10 +59,6 @@ defp handle_result(result, url) do {:get, _} -> Logger.debug("Rich media error for #{url}: HTTP GET failed") {:error, :get} - - {:error, :recv_chunk_timeout} -> - Logger.debug("Rich media error for #{url}: HTTP streaming response failed") - {:error, :get} end end From 0bf82a1745a38a3752f5b7df645a7d266b8fd9c8 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 28 Aug 2024 19:50:51 -0400 Subject: [PATCH 08/11] Add an AdapterHelper for Finch so we can support streaming request bodies --- lib/pleroma/http/adapter_helper.ex | 2 ++ lib/pleroma/http/adapter_helper/finch.ex | 33 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 lib/pleroma/http/adapter_helper/finch.ex diff --git a/lib/pleroma/http/adapter_helper.ex b/lib/pleroma/http/adapter_helper.ex index 4dbcccdcc1..be00ba78a6 100644 --- a/lib/pleroma/http/adapter_helper.ex +++ b/lib/pleroma/http/adapter_helper.ex @@ -52,6 +52,7 @@ defp adapter_helper do case adapter() do Tesla.Adapter.Gun -> AdapterHelper.Gun Tesla.Adapter.Hackney -> AdapterHelper.Hackney + {Tesla.Adapter.Finch, _} -> AdapterHelper.Finch _ -> AdapterHelper.Default end end @@ -124,6 +125,7 @@ def format_host(host) do def can_stream? do case Application.get_env(:tesla, :adapter) do Tesla.Adapter.Gun -> true + {Tesla.Adapter.Finch, _} -> true _ -> false end end diff --git a/lib/pleroma/http/adapter_helper/finch.ex b/lib/pleroma/http/adapter_helper/finch.ex new file mode 100644 index 0000000000..10a988901e --- /dev/null +++ b/lib/pleroma/http/adapter_helper/finch.ex @@ -0,0 +1,33 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.HTTP.AdapterHelper.Finch do + @behaviour Pleroma.HTTP.AdapterHelper + + alias Pleroma.Config + alias Pleroma.HTTP.AdapterHelper + + @spec options(keyword(), URI.t()) :: keyword() + def options(incoming_opts \\ [], %URI{} = _uri) do + proxy = + [:http, :proxy_url] + |> Config.get() + |> AdapterHelper.format_proxy() + + config_opts = Config.get([:http, :adapter], []) + + config_opts + |> Keyword.merge(incoming_opts) + |> AdapterHelper.maybe_add_proxy(proxy) + |> maybe_stream() + end + + # Tesla Finch adapter uses response: :stream + defp maybe_stream(opts) do + case Keyword.pop(opts, :stream, nil) do + {true, opts} -> Keyword.put(opts, :response, :stream) + {_, opts} -> opts + end + end +end From 8ab4dd20dfdd0cc92c18ade7d84bfb5364785a15 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 28 Aug 2024 19:52:29 -0400 Subject: [PATCH 09/11] Update comments, remove solved TODO --- lib/pleroma/http/adapter_helper.ex | 1 - lib/pleroma/http/adapter_helper/finch.ex | 2 +- lib/pleroma/http/adapter_helper/gun.ex | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/pleroma/http/adapter_helper.ex b/lib/pleroma/http/adapter_helper.ex index be00ba78a6..32c1080f7e 100644 --- a/lib/pleroma/http/adapter_helper.ex +++ b/lib/pleroma/http/adapter_helper.ex @@ -120,7 +120,6 @@ def format_host(host) do end end - # TODO add Finch support once we have an AdapterHelper for it @spec can_stream? :: bool() def can_stream? do case Application.get_env(:tesla, :adapter) do diff --git a/lib/pleroma/http/adapter_helper/finch.ex b/lib/pleroma/http/adapter_helper/finch.ex index 10a988901e..181caed7e1 100644 --- a/lib/pleroma/http/adapter_helper/finch.ex +++ b/lib/pleroma/http/adapter_helper/finch.ex @@ -23,7 +23,7 @@ def options(incoming_opts \\ [], %URI{} = _uri) do |> maybe_stream() end - # Tesla Finch adapter uses response: :stream + # Finch uses [response: :stream] defp maybe_stream(opts) do case Keyword.pop(opts, :stream, nil) do {true, opts} -> Keyword.put(opts, :response, :stream) diff --git a/lib/pleroma/http/adapter_helper/gun.ex b/lib/pleroma/http/adapter_helper/gun.ex index f9a8180f25..30ba26765b 100644 --- a/lib/pleroma/http/adapter_helper/gun.ex +++ b/lib/pleroma/http/adapter_helper/gun.ex @@ -48,7 +48,7 @@ defp put_timeout(opts) do Keyword.put(opts, :timeout, recv_timeout) end - # Tesla Gun adapter uses body_as: :stream + # Gun uses [body_as: :stream] defp maybe_stream(opts) do case Keyword.pop(opts, :stream, nil) do {true, opts} -> Keyword.put(opts, :body_as, :stream) From d01569822e0dc45349c321ad306f6e19b4e967af Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Wed, 28 Aug 2024 19:56:09 -0400 Subject: [PATCH 10/11] Changelog --- changelog.d/rich-media-no-heads.change | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/rich-media-no-heads.change diff --git a/changelog.d/rich-media-no-heads.change b/changelog.d/rich-media-no-heads.change new file mode 100644 index 0000000000..0bab323aa5 --- /dev/null +++ b/changelog.d/rich-media-no-heads.change @@ -0,0 +1 @@ +Rich Media preview fetching will skip making an HTTP HEAD request to check a URL for allowed content type and length if the Tesla adapter is Gun or Finch From c17a78c55a6b288c271923f730dc69aaf27e6556 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Thu, 29 Aug 2024 09:37:10 -0400 Subject: [PATCH 11/11] Rich Media: add stream byte counting as an extra protection against malicious URLs --- lib/pleroma/web/rich_media/helpers.ex | 34 +++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex index a242ca640a..d4be979578 100644 --- a/lib/pleroma/web/rich_media/helpers.ex +++ b/lib/pleroma/web/rich_media/helpers.ex @@ -23,7 +23,7 @@ defp stream(url) do {:get, Pleroma.HTTP.get(url, req_headers(), http_options())}, {_, :ok} <- {:content_type, check_content_type(headers)}, {_, :ok} <- {:content_length, check_content_length(headers)}, - body <- Enum.into(stream_body, <<>>) do + {:read_stream, {:ok, body}} <- {:read_stream, read_stream(stream_body)} do {:ok, body} end end @@ -52,8 +52,12 @@ defp handle_result(result, url) do Logger.debug("Rich media error for #{url}: content-type is #{type}") {:error, :content_type} - {:content_length, {_, length}} -> - Logger.debug("Rich media error for #{url}: content-length is #{length}") + {:content_length, :error} -> + Logger.debug("Rich media error for #{url}: content-length exceeded") + {:error, :body_too_large} + + {:read_stream, :error} -> + Logger.debug("Rich media error for #{url}: content-length exceeded") {:error, :body_too_large} {:get, _} -> @@ -82,7 +86,7 @@ defp check_content_length(headers) do {_, maybe_content_length} -> case Integer.parse(maybe_content_length) do {content_length, ""} when content_length <= max_body -> :ok - {_, ""} -> {:error, maybe_content_length} + {_, ""} -> :error _ -> :ok end @@ -91,6 +95,28 @@ defp check_content_length(headers) do end end + defp read_stream(stream) do + max_body = Keyword.get(http_options(), :max_body) + + try do + result = + Stream.transform(stream, 0, fn chunk, total_bytes -> + new_total = total_bytes + byte_size(chunk) + + if new_total > max_body do + raise("Exceeds max body limit of #{max_body}") + else + {[chunk], new_total} + end + end) + |> Enum.into(<<>>) + + {:ok, result} + rescue + _ -> :error + end + end + defp http_options do [ pool: :rich_media,