From 1b23acf164ebc4fde3fe1e4fdca6e11b7caa90ef Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Mon, 11 May 2020 23:21:53 +0300 Subject: [PATCH] [#2497] Media preview proxy for images: fixes, tweaks, refactoring, tests adjustments. --- config/config.exs | 8 ++- lib/pleroma/reverse_proxy/reverse_proxy.ex | 4 ++ lib/pleroma/web/media_proxy/media_proxy.ex | 33 +++++++--- .../web/media_proxy/media_proxy_controller.ex | 62 ++++++++++------- mix.exs | 1 + mix.lock | 2 + test/web/media_proxy/media_proxy_test.exs | 66 +++++++++++++------ 7 files changed, 119 insertions(+), 57 deletions(-) diff --git a/config/config.exs b/config/config.exs index 526901f836..0f92b1ef92 100644 --- a/config/config.exs +++ b/config/config.exs @@ -381,6 +381,8 @@ proxy_opts: [ redirect_on_failure: false, max_body_length: 25 * 1_048_576, + # Note: max_read_duration defaults to Pleroma.ReverseProxy.max_read_duration_default/1 + max_read_duration: 30_000, http: [ follow_redirect: true, pool: :media @@ -388,10 +390,14 @@ ], whitelist: [] +# Note: media preview proxy depends on media proxy to be enabled config :pleroma, :media_preview_proxy, enabled: false, limit_dimensions: "400x200", - max_body_length: 25 * 1_048_576 + proxy_opts: [ + head_request_max_read_duration: 5_000, + max_read_duration: 10_000 + ] config :pleroma, :chat, enabled: true diff --git a/lib/pleroma/reverse_proxy/reverse_proxy.ex b/lib/pleroma/reverse_proxy/reverse_proxy.ex index 4bbeb493ca..aeaf9bd399 100644 --- a/lib/pleroma/reverse_proxy/reverse_proxy.ex +++ b/lib/pleroma/reverse_proxy/reverse_proxy.ex @@ -16,6 +16,8 @@ defmodule Pleroma.ReverseProxy do @failed_request_ttl :timer.seconds(60) @methods ~w(GET HEAD) + def max_read_duration_default, do: @max_read_duration + @moduledoc """ A reverse proxy. @@ -370,6 +372,8 @@ defp body_size_constraint(size, limit) when is_integer(limit) and limit > 0 and defp body_size_constraint(_, _), do: :ok + defp check_read_duration(nil = _duration, max), do: check_read_duration(@max_read_duration, max) + defp check_read_duration(duration, max) when is_integer(duration) and is_integer(max) and max > 0 do if duration > max do diff --git a/lib/pleroma/web/media_proxy/media_proxy.ex b/lib/pleroma/web/media_proxy/media_proxy.ex index f4791c7584..4e01c14e4a 100644 --- a/lib/pleroma/web/media_proxy/media_proxy.ex +++ b/lib/pleroma/web/media_proxy/media_proxy.ex @@ -13,26 +13,32 @@ def url(url) when is_nil(url) or url == "", do: nil def url("/" <> _ = url), do: url def url(url) do - if disabled?() or local?(url) or whitelisted?(url) do + if not enabled?() or local?(url) or whitelisted?(url) do url else encode_url(url) end end + # Note: routing all URLs to preview handler (even local and whitelisted). + # Preview handler will call url/1 on decoded URLs, and applicable ones will detour media proxy. def preview_url(url) do - if disabled?() or whitelisted?(url) do - url - else + if preview_enabled?() do encode_preview_url(url) + else + url end end - defp disabled?, do: !Config.get([:media_proxy, :enabled], false) + def enabled?, do: Config.get([:media_proxy, :enabled], false) - defp local?(url), do: String.starts_with?(url, Pleroma.Web.base_url()) + # Note: media proxy must be enabled for media preview proxy in order to load all + # non-local non-whitelisted URLs through it and be sure that body size constraint is preserved. + def preview_enabled?, do: enabled?() and Config.get([:media_preview_proxy, :enabled], false) - defp whitelisted?(url) do + def local?(url), do: String.starts_with?(url, Pleroma.Web.base_url()) + + def whitelisted?(url) do %{host: domain} = URI.parse(url) mediaproxy_whitelist = Config.get([:media_proxy, :whitelist]) @@ -111,17 +117,24 @@ def build_preview_url(sig_base64, url_base64, filename \\ nil) do proxy_url("proxy/preview", sig_base64, url_base64, filename) end - def filename_matches(%{"filename" => _} = _, path, url) do + def verify_request_path_and_url( + %Plug.Conn{params: %{"filename" => _}, request_path: request_path}, + url + ) do + verify_request_path_and_url(request_path, url) + end + + def verify_request_path_and_url(request_path, url) when is_binary(request_path) do filename = filename(url) - if filename && not basename_matches?(path, filename) do + if filename && not basename_matches?(request_path, filename) do {:wrong_filename, filename} else :ok end end - def filename_matches(_, _, _), do: :ok + def verify_request_path_and_url(_, _), do: :ok defp basename_matches?(path, filename) do basename = Path.basename(path) diff --git a/lib/pleroma/web/media_proxy/media_proxy_controller.ex b/lib/pleroma/web/media_proxy/media_proxy_controller.ex index fe3f61c189..157365e08e 100644 --- a/lib/pleroma/web/media_proxy/media_proxy_controller.ex +++ b/lib/pleroma/web/media_proxy/media_proxy_controller.ex @@ -10,14 +10,12 @@ defmodule Pleroma.Web.MediaProxy.MediaProxyController do alias Pleroma.ReverseProxy alias Pleroma.Web.MediaProxy - @default_proxy_opts [max_body_length: 25 * 1_048_576, http: [follow_redirect: true]] - - def remote(conn, %{"sig" => sig64, "url" => url64} = params) do - with config <- Config.get([:media_proxy], []), - {_, true} <- {:enabled, Keyword.get(config, :enabled, false)}, + def remote(conn, %{"sig" => sig64, "url" => url64}) do + with {_, true} <- {:enabled, MediaProxy.enabled?()}, {:ok, url} <- MediaProxy.decode_url(sig64, url64), - :ok <- MediaProxy.filename_matches(params, conn.request_path, url) do - ReverseProxy.call(conn, url, Keyword.get(config, :proxy_opts, @default_proxy_opts)) + :ok <- MediaProxy.verify_request_path_and_url(conn, url) do + proxy_opts = Config.get([:media_proxy, :proxy_opts], []) + ReverseProxy.call(conn, url, proxy_opts) else {:enabled, false} -> send_resp(conn, 404, Plug.Conn.Status.reason_phrase(404)) @@ -30,10 +28,10 @@ def remote(conn, %{"sig" => sig64, "url" => url64} = params) do end end - def preview(conn, %{"sig" => sig64, "url" => url64} = params) do - with {_, true} <- {:enabled, Config.get([:media_preview_proxy, :enabled], false)}, + def preview(conn, %{"sig" => sig64, "url" => url64}) do + with {_, true} <- {:enabled, MediaProxy.preview_enabled?()}, {:ok, url} <- MediaProxy.decode_url(sig64, url64), - :ok <- MediaProxy.filename_matches(params, conn.request_path, url) do + :ok <- MediaProxy.verify_request_path_and_url(conn, url) do handle_preview(conn, url) else {:enabled, false} -> @@ -48,21 +46,27 @@ def preview(conn, %{"sig" => sig64, "url" => url64} = params) do end defp handle_preview(conn, url) do - with {:ok, %{status: status} = head_response} when status in 200..299 <- Tesla.head(url), - {_, true} <- {:acceptable_content_length, acceptable_body_length?(head_response)} do + with {:ok, %{status: status} = head_response} when status in 200..299 <- + Tesla.head(url, opts: [adapter: [timeout: preview_head_request_timeout()]]) do content_type = Tesla.get_header(head_response, "content-type") handle_preview(content_type, conn, url) else {_, %{status: status}} -> send_resp(conn, :failed_dependency, "Can't fetch HTTP headers (HTTP #{status}).") - {:acceptable_content_length, false} -> - send_resp(conn, :unprocessable_entity, "Source file size exceeds limit.") + {:error, :recv_response_timeout} -> + send_resp(conn, :failed_dependency, "HEAD request timeout.") + + _ -> + send_resp(conn, :failed_dependency, "Can't fetch HTTP headers.") end end - defp handle_preview("image/" <> _, %{params: params} = conn, url) do - with {:ok, %{status: status, body: body}} when status in 200..299 <- Tesla.get(url), + defp handle_preview("image/" <> _ = content_type, %{params: params} = conn, url) do + with {:ok, %{status: status, body: body}} when status in 200..299 <- + url + |> MediaProxy.url() + |> Tesla.get(opts: [adapter: [timeout: preview_timeout()]]), {:ok, path} <- MogrifyHelper.store_as_temporary_file(url, body), resize_dimensions <- Map.get( @@ -70,12 +74,19 @@ defp handle_preview("image/" <> _, %{params: params} = conn, url) do "limit_dimensions", Config.get([:media_preview_proxy, :limit_dimensions]) ), - %Mogrify.Image{} <- MogrifyHelper.in_place_resize_to_limit(path, resize_dimensions) do - send_file(conn, 200, path) + %Mogrify.Image{} <- MogrifyHelper.in_place_resize_to_limit(path, resize_dimensions), + {:ok, image_binary} <- File.read(path), + _ <- File.rm(path) do + conn + |> put_resp_header("content-type", content_type) + |> send_resp(200, image_binary) else {_, %{status: _}} -> send_resp(conn, :failed_dependency, "Can't fetch the image.") + {:error, :recv_response_timeout} -> + send_resp(conn, :failed_dependency, "Downstream timeout.") + _ -> send_resp(conn, :failed_dependency, "Can't handle image preview.") end @@ -85,13 +96,14 @@ defp handle_preview(content_type, conn, _url) do send_resp(conn, :unprocessable_entity, "Unsupported content type: #{content_type}.") end - defp acceptable_body_length?(head_response) do - max_body_length = Config.get([:media_preview_proxy, :max_body_length], nil) - content_length = Tesla.get_header(head_response, "content-length") - content_length = with {int, _} <- Integer.parse(content_length), do: int + defp preview_head_request_timeout do + Config.get([:media_preview_proxy, :proxy_opts, :head_request_max_read_duration]) || + preview_timeout() + end - content_length == :error or - max_body_length in [nil, :infinity] or - content_length <= max_body_length + defp preview_timeout do + Config.get([:media_preview_proxy, :proxy_opts, :max_read_duration]) || + Config.get([:media_proxy, :proxy_opts, :max_read_duration]) || + ReverseProxy.max_read_duration_default() end end diff --git a/mix.exs b/mix.exs index 6d65e18d45..a9c4ad2e33 100644 --- a/mix.exs +++ b/mix.exs @@ -139,6 +139,7 @@ defp deps do github: "ninenines/gun", ref: "e1a69b36b180a574c0ac314ced9613fdd52312cc", override: true}, {:jason, "~> 1.0"}, {:mogrify, "~> 0.6.1"}, + {:eimp, ">= 0.0.0"}, {:ex_aws, "~> 2.1"}, {:ex_aws_s3, "~> 2.0"}, {:sweet_xml, "~> 0.6.6"}, diff --git a/mix.lock b/mix.lock index c400202b70..ede7b0ada8 100644 --- a/mix.lock +++ b/mix.lock @@ -29,6 +29,7 @@ "ecto": {:hex, :ecto, "3.4.0", "a7a83ab8359bf816ce729e5e65981ce25b9fc5adfc89c2ea3980f4fed0bfd7c1", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "5eed18252f5b5bbadec56a24112b531343507dbe046273133176b12190ce19cc"}, "ecto_enum": {:hex, :ecto_enum, "1.4.0", "d14b00e04b974afc69c251632d1e49594d899067ee2b376277efd8233027aec8", [:mix], [{:ecto, ">= 3.0.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:ecto_sql, "> 3.0.0", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:mariaex, ">= 0.0.0", [hex: :mariaex, repo: "hexpm", optional: true]}, {:postgrex, ">= 0.0.0", [hex: :postgrex, repo: "hexpm", optional: true]}], "hexpm", "8fb55c087181c2b15eee406519dc22578fa60dd82c088be376d0010172764ee4"}, "ecto_sql": {:hex, :ecto_sql, "3.3.4", "aa18af12eb875fbcda2f75e608b3bd534ebf020fc4f6448e4672fcdcbb081244", [:mix], [{:db_connection, "~> 2.2", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.4 or ~> 3.3.3", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.3.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.15.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "5eccbdbf92e3c6f213007a82d5dbba4cd9bb659d1a21331f89f408e4c0efd7a8"}, + "eimp": {:hex, :eimp, "1.0.14", "fc297f0c7e2700457a95a60c7010a5f1dcb768a083b6d53f49cd94ab95a28f22", [:rebar3], [{:p1_utils, "1.0.18", [hex: :p1_utils, repo: "hexpm", optional: false]}], "hexpm", "501133f3112079b92d9e22da8b88bf4f0e13d4d67ae9c15c42c30bd25ceb83b6"}, "esshd": {:hex, :esshd, "0.1.1", "d4dd4c46698093a40a56afecce8a46e246eb35463c457c246dacba2e056f31b5", [:mix], [], "hexpm", "d73e341e3009d390aa36387dc8862860bf9f874c94d9fd92ade2926376f49981"}, "eternal": {:hex, :eternal, "1.2.1", "d5b6b2499ba876c57be2581b5b999ee9bdf861c647401066d3eeed111d096bc4", [:mix], [], "hexpm", "b14f1dc204321429479c569cfbe8fb287541184ed040956c8862cb7a677b8406"}, "ex2ms": {:hex, :ex2ms, "1.5.0", "19e27f9212be9a96093fed8cdfbef0a2b56c21237196d26760f11dfcfae58e97", [:mix], [], "hexpm"}, @@ -75,6 +76,7 @@ "nodex": {:git, "https://git.pleroma.social/pleroma/nodex", "cb6730f943cfc6aad674c92161be23a8411f15d1", [ref: "cb6730f943cfc6aad674c92161be23a8411f15d1"]}, "oban": {:hex, :oban, "1.2.0", "7cca94d341be43d220571e28f69131c4afc21095b25257397f50973d3fc59b07", [:mix], [{:ecto_sql, "~> 3.1", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:postgrex, "~> 0.14", [hex: :postgrex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "ba5f8b3f7d76967b3e23cf8014f6a13e4ccb33431e4808f036709a7f822362ee"}, "open_api_spex": {:git, "https://git.pleroma.social/pleroma/elixir-libraries/open_api_spex.git", "b862ebd78de0df95875cf46feb6e9607130dc2a8", [ref: "b862ebd78de0df95875cf46feb6e9607130dc2a8"]}, + "p1_utils": {:hex, :p1_utils, "1.0.18", "3fe224de5b2e190d730a3c5da9d6e8540c96484cf4b4692921d1e28f0c32b01c", [:rebar3], [], "hexpm", "1fc8773a71a15553b179c986b22fbeead19b28fe486c332d4929700ffeb71f88"}, "parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm", "17ef63abde837ad30680ea7f857dd9e7ced9476cdd7b0394432af4bfc241b960"}, "pbkdf2_elixir": {:hex, :pbkdf2_elixir, "0.12.4", "8dd29ed783f2e12195d7e0a4640effc0a7c37e6537da491f1db01839eee6d053", [:mix], [], "hexpm", "595d09db74cb093b1903381c9de423276a931a2480a46a1a5dc7f932a2a6375b"}, "phoenix": {:hex, :phoenix, "1.4.13", "67271ad69b51f3719354604f4a3f968f83aa61c19199343656c9caee057ff3b8", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 1.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:plug, "~> 1.8.1 or ~> 1.9", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 1.0 or ~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "ab765a0feddb81fc62e2116c827b5f068df85159c162bee760745276ad7ddc1b"}, diff --git a/test/web/media_proxy/media_proxy_test.exs b/test/web/media_proxy/media_proxy_test.exs index cad0acd306..ac5d8fd32a 100644 --- a/test/web/media_proxy/media_proxy_test.exs +++ b/test/web/media_proxy/media_proxy_test.exs @@ -85,38 +85,62 @@ test "validates signature" do assert MediaProxy.decode_url(sig, base64) == {:error, :invalid_signature} end - test "`filename_matches/_` preserves the encoded or decoded path" do - assert MediaProxy.filename_matches( - %{"filename" => "/Hello world.jpg"}, - "/Hello world.jpg", - "http://pleroma.social/Hello world.jpg" + def test_verify_request_path_and_url(request_path, url, expected_result) do + assert MediaProxy.verify_request_path_and_url(request_path, url) == expected_result + + assert MediaProxy.verify_request_path_and_url( + %Plug.Conn{ + params: %{"filename" => Path.basename(request_path)}, + request_path: request_path + }, + url + ) == expected_result + end + + test "if first arg of `verify_request_path_and_url/2` is a Plug.Conn without \"filename\" " <> + "parameter, `verify_request_path_and_url/2` returns :ok " do + assert MediaProxy.verify_request_path_and_url( + %Plug.Conn{params: %{}, request_path: "/some/path"}, + "https://instance.com/file.jpg" ) == :ok - assert MediaProxy.filename_matches( - %{"filename" => "/Hello%20world.jpg"}, - "/Hello%20world.jpg", - "http://pleroma.social/Hello%20world.jpg" + assert MediaProxy.verify_request_path_and_url( + %Plug.Conn{params: %{}, request_path: "/path/to/file.jpg"}, + "https://instance.com/file.jpg" ) == :ok + end - assert MediaProxy.filename_matches( - %{"filename" => "/my%2Flong%2Furl%2F2019%2F07%2FS.jpg"}, - "/my%2Flong%2Furl%2F2019%2F07%2FS.jpg", - "http://pleroma.social/my%2Flong%2Furl%2F2019%2F07%2FS.jpg" - ) == :ok + test "`verify_request_path_and_url/2` preserves the encoded or decoded path" do + test_verify_request_path_and_url( + "/Hello world.jpg", + "http://pleroma.social/Hello world.jpg", + :ok + ) - assert MediaProxy.filename_matches( - %{"filename" => "/my%2Flong%2Furl%2F2019%2F07%2FS.jp"}, - "/my%2Flong%2Furl%2F2019%2F07%2FS.jp", - "http://pleroma.social/my%2Flong%2Furl%2F2019%2F07%2FS.jpg" - ) == {:wrong_filename, "my%2Flong%2Furl%2F2019%2F07%2FS.jpg"} + test_verify_request_path_and_url( + "/Hello%20world.jpg", + "http://pleroma.social/Hello%20world.jpg", + :ok + ) + + test_verify_request_path_and_url( + "/my%2Flong%2Furl%2F2019%2F07%2FS.jpg", + "http://pleroma.social/my%2Flong%2Furl%2F2019%2F07%2FS.jpg", + :ok + ) + + test_verify_request_path_and_url( + "/my%2Flong%2Furl%2F2019%2F07%2FS", + "http://pleroma.social/my%2Flong%2Furl%2F2019%2F07%2FS.jpg", + {:wrong_filename, "my%2Flong%2Furl%2F2019%2F07%2FS.jpg"} + ) end test "encoded url are tried to match for proxy as `conn.request_path` encodes the url" do # conn.request_path will return encoded url request_path = "/ANALYSE-DAI-_-LE-STABLECOIN-100-D%C3%89CENTRALIS%C3%89-BQ.jpg" - assert MediaProxy.filename_matches( - true, + assert MediaProxy.verify_request_path_and_url( request_path, "https://mydomain.com/uploads/2019/07/ANALYSE-DAI-_-LE-STABLECOIN-100-DÉCENTRALISÉ-BQ.jpg" ) == :ok