Merge branch 'pleroma-http-stream' into 'develop'

Pleroma.HTTP: support streaming response bodies

See merge request pleroma/pleroma!4239
This commit is contained in:
feld 2024-08-29 14:54:01 +00:00
commit 8d07034608
5 changed files with 114 additions and 10 deletions

View file

@ -0,0 +1 @@
Rich Media preview fetching will skip making an HTTP HEAD request to check a URL for allowed content type and length if the Tesla adapter is Gun or Finch

View file

@ -52,6 +52,7 @@ defp adapter_helper do
case adapter() do
Tesla.Adapter.Gun -> AdapterHelper.Gun
Tesla.Adapter.Hackney -> AdapterHelper.Hackney
{Tesla.Adapter.Finch, _} -> AdapterHelper.Finch
_ -> AdapterHelper.Default
end
end
@ -118,4 +119,13 @@ def format_host(host) do
host_charlist
end
end
@spec can_stream? :: bool()
def can_stream? do
case Application.get_env(:tesla, :adapter) do
Tesla.Adapter.Gun -> true
{Tesla.Adapter.Finch, _} -> true
_ -> false
end
end
end

View file

@ -0,0 +1,33 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.HTTP.AdapterHelper.Finch do
@behaviour Pleroma.HTTP.AdapterHelper
alias Pleroma.Config
alias Pleroma.HTTP.AdapterHelper
@spec options(keyword(), URI.t()) :: keyword()
def options(incoming_opts \\ [], %URI{} = _uri) do
proxy =
[:http, :proxy_url]
|> Config.get()
|> AdapterHelper.format_proxy()
config_opts = Config.get([:http, :adapter], [])
config_opts
|> Keyword.merge(incoming_opts)
|> AdapterHelper.maybe_add_proxy(proxy)
|> maybe_stream()
end
# Finch uses [response: :stream]
defp maybe_stream(opts) do
case Keyword.pop(opts, :stream, nil) do
{true, opts} -> Keyword.put(opts, :response, :stream)
{_, opts} -> opts
end
end
end

View file

@ -32,6 +32,7 @@ def options(incoming_opts \\ [], %URI{} = uri) do
|> AdapterHelper.maybe_add_proxy(proxy)
|> Keyword.merge(incoming_opts)
|> put_timeout()
|> maybe_stream()
end
defp add_scheme_opts(opts, %{scheme: "http"}), do: opts
@ -47,6 +48,14 @@ defp put_timeout(opts) do
Keyword.put(opts, :timeout, recv_timeout)
end
# Gun uses [body_as: :stream]
defp maybe_stream(opts) do
case Keyword.pop(opts, :stream, nil) do
{true, opts} -> Keyword.put(opts, :body_as, :stream)
{_, opts} -> opts
end
end
@spec pool_timeout(pool()) :: non_neg_integer()
def pool_timeout(pool) do
default = Config.get([:pools, :default, :recv_timeout], 5_000)

View file

@ -11,16 +11,39 @@ defmodule Pleroma.Web.RichMedia.Helpers do
@spec rich_media_get(String.t()) :: {:ok, String.t()} | get_errors()
def rich_media_get(url) do
headers = [{"user-agent", Pleroma.Application.user_agent() <> "; Bot"}]
case Pleroma.HTTP.AdapterHelper.can_stream?() do
true -> stream(url)
false -> head_first(url)
end
|> handle_result(url)
end
defp stream(url) do
with {_, {:ok, %Tesla.Env{status: 200, body: stream_body, headers: headers}}} <-
{:get, Pleroma.HTTP.get(url, req_headers(), http_options())},
{_, :ok} <- {:content_type, check_content_type(headers)},
{_, :ok} <- {:content_length, check_content_length(headers)},
{:read_stream, {:ok, body}} <- {:read_stream, read_stream(stream_body)} do
{:ok, body}
end
end
defp head_first(url) do
with {_, {:ok, %Tesla.Env{status: 200, headers: headers}}} <-
{:head, Pleroma.HTTP.head(url, headers, http_options())},
{:head, Pleroma.HTTP.head(url, req_headers(), http_options())},
{_, :ok} <- {:content_type, check_content_type(headers)},
{_, :ok} <- {:content_length, check_content_length(headers)},
{_, {:ok, %Tesla.Env{status: 200, body: body}}} <-
{:get, Pleroma.HTTP.get(url, headers, http_options())} do
{:get, Pleroma.HTTP.get(url, req_headers(), http_options())} do
{:ok, body}
else
end
end
defp handle_result(result, url) do
case result do
{:ok, body} ->
{:ok, body}
{:head, _} ->
Logger.debug("Rich media error for #{url}: HTTP HEAD failed")
{:error, :head}
@ -29,8 +52,12 @@ def rich_media_get(url) do
Logger.debug("Rich media error for #{url}: content-type is #{type}")
{:error, :content_type}
{:content_length, {_, length}} ->
Logger.debug("Rich media error for #{url}: content-length is #{length}")
{:content_length, :error} ->
Logger.debug("Rich media error for #{url}: content-length exceeded")
{:error, :body_too_large}
{:read_stream, :error} ->
Logger.debug("Rich media error for #{url}: content-length exceeded")
{:error, :body_too_large}
{:get, _} ->
@ -59,7 +86,7 @@ defp check_content_length(headers) do
{_, maybe_content_length} ->
case Integer.parse(maybe_content_length) do
{content_length, ""} when content_length <= max_body -> :ok
{_, ""} -> {:error, maybe_content_length}
{_, ""} -> :error
_ -> :ok
end
@ -68,13 +95,37 @@ defp check_content_length(headers) do
end
end
defp http_options do
timeout = Config.get!([:rich_media, :timeout])
defp read_stream(stream) do
max_body = Keyword.get(http_options(), :max_body)
try do
result =
Stream.transform(stream, 0, fn chunk, total_bytes ->
new_total = total_bytes + byte_size(chunk)
if new_total > max_body do
raise("Exceeds max body limit of #{max_body}")
else
{[chunk], new_total}
end
end)
|> Enum.into(<<>>)
{:ok, result}
rescue
_ -> :error
end
end
defp http_options do
[
pool: :rich_media,
max_body: Config.get([:rich_media, :max_body], 5_000_000),
tesla_middleware: [{Tesla.Middleware.Timeout, timeout: timeout}]
stream: true
]
end
defp req_headers do
[{"user-agent", Pleroma.Application.user_agent() <> "; Bot"}]
end
end