From f5ad4309747e85192e6034fd362103b0b71869d0 Mon Sep 17 00:00:00 2001 From: Sachin Joshi Date: Sun, 7 Jul 2019 14:13:40 +0545 Subject: [PATCH 1/5] make sure the url used by proxy is same as origin url encoding or decoding it breaks some of the signed url --- lib/pleroma/web/media_proxy/media_proxy.ex | 23 +--------------------- test/media_proxy_test.exs | 7 +++++-- 2 files changed, 6 insertions(+), 24 deletions(-) diff --git a/lib/pleroma/web/media_proxy/media_proxy.ex b/lib/pleroma/web/media_proxy/media_proxy.ex index cee6d8481a..dd8888a021 100644 --- a/lib/pleroma/web/media_proxy/media_proxy.ex +++ b/lib/pleroma/web/media_proxy/media_proxy.ex @@ -33,20 +33,7 @@ defp whitelisted?(url) do def encode_url(url) do secret = Pleroma.Config.get([Pleroma.Web.Endpoint, :secret_key_base]) - - # Must preserve `%2F` for compatibility with S3 - # https://git.pleroma.social/pleroma/pleroma/issues/580 - replacement = get_replacement(url, ":2F:") - - # The URL is url-decoded and encoded again to ensure it is correctly encoded and not twice. - base64 = - url - |> String.replace("%2F", replacement) - |> URI.decode() - |> URI.encode() - |> String.replace(replacement, "%2F") - |> Base.url_encode64(@base64_opts) - + base64 = Base.url_encode64(url, @base64_opts) sig = :crypto.hmac(:sha, secret, base64) sig64 = sig |> Base.url_encode64(@base64_opts) @@ -80,12 +67,4 @@ def build_url(sig_base64, url_base64, filename \\ nil) do |> Enum.filter(fn value -> value end) |> Path.join() end - - defp get_replacement(url, replacement) do - if String.contains?(url, replacement) do - get_replacement(url, replacement <> replacement) - else - replacement - end - end end diff --git a/test/media_proxy_test.exs b/test/media_proxy_test.exs index b23aeb88be..cd1cbd202a 100644 --- a/test/media_proxy_test.exs +++ b/test/media_proxy_test.exs @@ -70,9 +70,12 @@ test "encodes and decodes URL and ignores query params for the path" do assert decode_result(encoded) == url end - test "ensures urls are url-encoded" do + # Some of the signed url expect the special character in the url to be same + # for the proxy to work. + # Issue https://git.pleroma.social/pleroma/pleroma/issues/1055 + test "ensures urls are maintained (character are not encoded or decoded)" do assert decode_result(url("https://pleroma.social/Hello world.jpg")) == - "https://pleroma.social/Hello%20world.jpg" + "https://pleroma.social/Hello world.jpg" assert decode_result(url("https://pleroma.social/Hello%20world.jpg")) == "https://pleroma.social/Hello%20world.jpg" From 4213a4e2aa144ba0a3dff69d5b991a2deba0aa85 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 9 Jul 2019 09:41:41 -0500 Subject: [PATCH 2/5] Test that all ASCII encoded characters are preserved --- test/media_proxy_test.exs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/media_proxy_test.exs b/test/media_proxy_test.exs index cd1cbd202a..1b6b3c1fd0 100644 --- a/test/media_proxy_test.exs +++ b/test/media_proxy_test.exs @@ -145,9 +145,9 @@ test "uses the configured base_url" do end # https://git.pleroma.social/pleroma/pleroma/issues/580 - test "encoding S3 links (must preserve `%2F`)" do + test "preserve ascii encoding" do url = - "https://s3.amazonaws.com/example/test.png?X-Amz-Credential=your-access-key-id%2F20130721%2Fus-east-1%2Fs3%2Faws4_request" + "https://pleroma.com/%20/%21/%22/%23/%24/%25/%26/%27/%28/%29/%2A/%2B/%2C/%2D/%2E/%2F/%30/%31/%32/%33/%34/%35/%36/%37/%38/%39/%3A/%3B/%3C/%3D/%3E/%3F/%40/%41/%42/%43/%44/%45/%46/%47/%48/%49/%4A/%4B/%4C/%4D/%4E/%4F/%50/%51/%52/%53/%54/%55/%56/%57/%58/%59/%5A/%5B/%5C/%5D/%5E/%5F/%60/%61/%62/%63/%64/%65/%66/%67/%68/%69/%6A/%6B/%6C/%6D/%6E/%6F/%70/%71/%72/%73/%74/%75/%76/%77/%78/%79/%7A/%7B/%7C/%7D/%7E/%7F/%80/%81/%82/%83/%84/%85/%86/%87/%88/%89/%8A/%8B/%8C/%8D/%8E/%8F/%90/%91/%92/%93/%94/%95/%96/%97/%98/%99/%9A/%9B/%9C/%9D/%9E/%9F/%C2%A0/%A1/%A2/%A3/%A4/%A5/%A6/%A7/%A8/%A9/%AA/%AB/%AC/%C2%AD/%AE/%AF/%B0/%B1/%B2/%B3/%B4/%B5/%B6/%B7/%B8/%B9/%BA/%BB/%BC/%BD/%BE/%BF/%C0/%C1/%C2/%C3/%C4/%C5/%C6/%C7/%C8/%C9/%CA/%CB/%CC/%CD/%CE/%CF/%D0/%D1/%D2/%D3/%D4/%D5/%D6/%D7/%D8/%D9/%DA/%DB/%DC/%DD/%DE/%DF/%E0/%E1/%E2/%E3/%E4/%E5/%E6/%E7/%E8/%E9/%EA/%EB/%EC/%ED/%EE/%EF/%F0/%F1/%F2/%F3/%F4/%F5/%F6/%F7/%F8/%F9/%FA/%FB/%FC/%FD/%FE/%FF" encoded = url(url) assert decode_result(encoded) == url From 98f13eac9e38c6ec44a7146cfc58114b0148f462 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 9 Jul 2019 10:11:42 -0500 Subject: [PATCH 3/5] Capitalize --- test/media_proxy_test.exs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test/media_proxy_test.exs b/test/media_proxy_test.exs index 1b6b3c1fd0..144d261db8 100644 --- a/test/media_proxy_test.exs +++ b/test/media_proxy_test.exs @@ -145,7 +145,7 @@ test "uses the configured base_url" do end # https://git.pleroma.social/pleroma/pleroma/issues/580 - test "preserve ascii encoding" do + test "preserve ASCII encoding" do url = "https://pleroma.com/%20/%21/%22/%23/%24/%25/%26/%27/%28/%29/%2A/%2B/%2C/%2D/%2E/%2F/%30/%31/%32/%33/%34/%35/%36/%37/%38/%39/%3A/%3B/%3C/%3D/%3E/%3F/%40/%41/%42/%43/%44/%45/%46/%47/%48/%49/%4A/%4B/%4C/%4D/%4E/%4F/%50/%51/%52/%53/%54/%55/%56/%57/%58/%59/%5A/%5B/%5C/%5D/%5E/%5F/%60/%61/%62/%63/%64/%65/%66/%67/%68/%69/%6A/%6B/%6C/%6D/%6E/%6F/%70/%71/%72/%73/%74/%75/%76/%77/%78/%79/%7A/%7B/%7C/%7D/%7E/%7F/%80/%81/%82/%83/%84/%85/%86/%87/%88/%89/%8A/%8B/%8C/%8D/%8E/%8F/%90/%91/%92/%93/%94/%95/%96/%97/%98/%99/%9A/%9B/%9C/%9D/%9E/%9F/%C2%A0/%A1/%A2/%A3/%A4/%A5/%A6/%A7/%A8/%A9/%AA/%AB/%AC/%C2%AD/%AE/%AF/%B0/%B1/%B2/%B3/%B4/%B5/%B6/%B7/%B8/%B9/%BA/%BB/%BC/%BD/%BE/%BF/%C0/%C1/%C2/%C3/%C4/%C5/%C6/%C7/%C8/%C9/%CA/%CB/%CC/%CD/%CE/%CF/%D0/%D1/%D2/%D3/%D4/%D5/%D6/%D7/%D8/%D9/%DA/%DB/%DC/%DD/%DE/%DF/%E0/%E1/%E2/%E3/%E4/%E5/%E6/%E7/%E8/%E9/%EA/%EB/%EC/%ED/%EE/%EF/%F0/%F1/%F2/%F3/%F4/%F5/%F6/%F7/%F8/%F9/%FA/%FB/%FC/%FD/%FE/%FF" @@ -153,6 +153,17 @@ test "preserve ascii encoding" do assert decode_result(encoded) == url end + # This includes unsafe/reserved characters which are not interpreted as part of the URL + # and would otherwise have to be ASCII encoded. It is our role to ensure the proxied URL + # is unmodified, so we are testing these characters anyway. + test "preserve non-unicode characters per RFC3986" do + url = + "https://pleroma.com/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890-._~:/?#[]@!$&'()*+,;=|^`{}" + + encoded = url(url) + assert decode_result(encoded) == url + end + test "does not change whitelisted urls" do upload_config = Pleroma.Config.get([Pleroma.Upload]) media_url = "https://media.pleroma.social" From ce3ffad13a5ceeab383f43bf576ff8bbbd0af42f Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 9 Jul 2019 10:23:22 -0500 Subject: [PATCH 4/5] Remove duplicated test. New one is more comprehensive. --- test/media_proxy_test.exs | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/test/media_proxy_test.exs b/test/media_proxy_test.exs index 144d261db8..13922fe4ae 100644 --- a/test/media_proxy_test.exs +++ b/test/media_proxy_test.exs @@ -70,17 +70,6 @@ test "encodes and decodes URL and ignores query params for the path" do assert decode_result(encoded) == url end - # Some of the signed url expect the special character in the url to be same - # for the proxy to work. - # Issue https://git.pleroma.social/pleroma/pleroma/issues/1055 - test "ensures urls are maintained (character are not encoded or decoded)" do - assert decode_result(url("https://pleroma.social/Hello world.jpg")) == - "https://pleroma.social/Hello world.jpg" - - assert decode_result(url("https://pleroma.social/Hello%20world.jpg")) == - "https://pleroma.social/Hello%20world.jpg" - end - test "validates signature" do secret_key_base = Pleroma.Config.get([Pleroma.Web.Endpoint, :secret_key_base]) @@ -144,7 +133,10 @@ test "uses the configured base_url" do assert String.starts_with?(encoded, Pleroma.Config.get([:media_proxy, :base_url])) end - # https://git.pleroma.social/pleroma/pleroma/issues/580 + # Some sites expect ASCII encoded characters in the URL to be preserved even if + # unnecessary. + # Issues: https://git.pleroma.social/pleroma/pleroma/issues/580 + # https://git.pleroma.social/pleroma/pleroma/issues/1055 test "preserve ASCII encoding" do url = "https://pleroma.com/%20/%21/%22/%23/%24/%25/%26/%27/%28/%29/%2A/%2B/%2C/%2D/%2E/%2F/%30/%31/%32/%33/%34/%35/%36/%37/%38/%39/%3A/%3B/%3C/%3D/%3E/%3F/%40/%41/%42/%43/%44/%45/%46/%47/%48/%49/%4A/%4B/%4C/%4D/%4E/%4F/%50/%51/%52/%53/%54/%55/%56/%57/%58/%59/%5A/%5B/%5C/%5D/%5E/%5F/%60/%61/%62/%63/%64/%65/%66/%67/%68/%69/%6A/%6B/%6C/%6D/%6E/%6F/%70/%71/%72/%73/%74/%75/%76/%77/%78/%79/%7A/%7B/%7C/%7D/%7E/%7F/%80/%81/%82/%83/%84/%85/%86/%87/%88/%89/%8A/%8B/%8C/%8D/%8E/%8F/%90/%91/%92/%93/%94/%95/%96/%97/%98/%99/%9A/%9B/%9C/%9D/%9E/%9F/%C2%A0/%A1/%A2/%A3/%A4/%A5/%A6/%A7/%A8/%A9/%AA/%AB/%AC/%C2%AD/%AE/%AF/%B0/%B1/%B2/%B3/%B4/%B5/%B6/%B7/%B8/%B9/%BA/%BB/%BC/%BD/%BE/%BF/%C0/%C1/%C2/%C3/%C4/%C5/%C6/%C7/%C8/%C9/%CA/%CB/%CC/%CD/%CE/%CF/%D0/%D1/%D2/%D3/%D4/%D5/%D6/%D7/%D8/%D9/%DA/%DB/%DC/%DD/%DE/%DF/%E0/%E1/%E2/%E3/%E4/%E5/%E6/%E7/%E8/%E9/%EA/%EB/%EC/%ED/%EE/%EF/%F0/%F1/%F2/%F3/%F4/%F5/%F6/%F7/%F8/%F9/%FA/%FB/%FC/%FD/%FE/%FF" From e143747445a0cd4f9b34c1b96ab7e87632e21a74 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 9 Jul 2019 10:55:36 -0500 Subject: [PATCH 5/5] Add test for URLs with Unicode characters too --- test/media_proxy_test.exs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/media_proxy_test.exs b/test/media_proxy_test.exs index 13922fe4ae..1d6d170b72 100644 --- a/test/media_proxy_test.exs +++ b/test/media_proxy_test.exs @@ -156,6 +156,13 @@ test "preserve non-unicode characters per RFC3986" do assert decode_result(encoded) == url end + test "preserve unicode characters" do + url = "https://ko.wikipedia.org/wiki/위키백과:대문" + + encoded = url(url) + assert decode_result(encoded) == url + end + test "does not change whitelisted urls" do upload_config = Pleroma.Config.get([Pleroma.Upload]) media_url = "https://media.pleroma.social"