Pleroma.Upload.Filter.Dedupe: sharding directory structure
Dedupe now uses a three-level sharding directory structure to improve performance when many files are uploaded and stored on a filesystem instead of an object store. (note: Minio still affected as it still uses a traditional filesystem) This does not help if you already have hundreds of thousands of files uploaded. The media URLs are permanently part of the activity so the files cannot be relocated. A motivated user could write a tool to move the files and perhaps write an Nginx or equivalent redirect to make the files still accessible, but that is beyond the scope of this change.
This commit is contained in:
parent
6099a94dbc
commit
d2de251c4d
5 changed files with 21 additions and 8 deletions
1
changelog.d/dedupe-sharding.change
Normal file
1
changelog.d/dedupe-sharding.change
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Dedupe upload filter now uses a three-level sharding directory structure
|
|
@ -17,8 +17,16 @@ def filter(%Upload{name: name, tempfile: tempfile} = upload) do
|
||||||
|> Base.encode16(case: :lower)
|
|> Base.encode16(case: :lower)
|
||||||
|
|
||||||
filename = shasum <> "." <> extension
|
filename = shasum <> "." <> extension
|
||||||
{:ok, :filtered, %Upload{upload | id: shasum, path: filename}}
|
|
||||||
|
{:ok, :filtered, %Upload{upload | id: shasum, path: shard_path(filename)}}
|
||||||
end
|
end
|
||||||
|
|
||||||
def filter(_), do: {:ok, :noop}
|
def filter(_), do: {:ok, :noop}
|
||||||
|
|
||||||
|
@spec shard_path(String.t()) :: String.t()
|
||||||
|
def shard_path(
|
||||||
|
<<a::binary-size(2), b::binary-size(2), c::binary-size(2), _::binary>> = filename
|
||||||
|
) do
|
||||||
|
Path.join([a, b, c, filename])
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -174,8 +174,9 @@ test "with dedupe enabled" do
|
||||||
|
|
||||||
filename = Path.basename(href)
|
filename = Path.basename(href)
|
||||||
|
|
||||||
assert {:ok, files} = File.ls(uploads_dir)
|
expected_path = Path.join([uploads_dir, Pleroma.Upload.Filter.Dedupe.shard_path(filename)])
|
||||||
assert filename in files
|
|
||||||
|
assert File.exists?(expected_path)
|
||||||
|
|
||||||
Object.delete(note)
|
Object.delete(note)
|
||||||
|
|
||||||
|
@ -183,8 +184,7 @@ test "with dedupe enabled" do
|
||||||
|
|
||||||
assert Object.get_by_id(note.id).data["deleted"]
|
assert Object.get_by_id(note.id).data["deleted"]
|
||||||
assert Object.get_by_id(attachment.id) == nil
|
assert Object.get_by_id(attachment.id) == nil
|
||||||
assert {:ok, files} = File.ls(uploads_dir)
|
refute File.exists?(expected_path)
|
||||||
refute filename in files
|
|
||||||
end
|
end
|
||||||
|
|
||||||
test "with objects that have legacy data.url attribute" do
|
test "with objects that have legacy data.url attribute" do
|
||||||
|
|
|
@ -23,10 +23,12 @@ test "adds shasum" do
|
||||||
tempfile: Path.absname("test/fixtures/image_tmp.jpg")
|
tempfile: Path.absname("test/fixtures/image_tmp.jpg")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
expected_path = Dedupe.shard_path(@shasum <> ".jpg")
|
||||||
|
|
||||||
assert {
|
assert {
|
||||||
:ok,
|
:ok,
|
||||||
:filtered,
|
:filtered,
|
||||||
%Pleroma.Upload{id: @shasum, path: @shasum <> ".jpg"}
|
%Pleroma.Upload{id: @shasum, path: ^expected_path}
|
||||||
} = Dedupe.filter(upload)
|
} = Dedupe.filter(upload)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -149,6 +149,9 @@ test "returns a media url" do
|
||||||
|
|
||||||
test "copies the file to the configured folder with deduping" do
|
test "copies the file to the configured folder with deduping" do
|
||||||
File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg")
|
File.cp!("test/fixtures/image.jpg", "test/fixtures/image_tmp.jpg")
|
||||||
|
expected_filename = "e30397b58d226d6583ab5b8b3c5defb0c682bda5c31ef07a9f57c1c4986e3781.jpg"
|
||||||
|
|
||||||
|
expected_path = Pleroma.Upload.Filter.Dedupe.shard_path(expected_filename)
|
||||||
|
|
||||||
file = %Plug.Upload{
|
file = %Plug.Upload{
|
||||||
content_type: "image/jpeg",
|
content_type: "image/jpeg",
|
||||||
|
@ -159,8 +162,7 @@ test "copies the file to the configured folder with deduping" do
|
||||||
{:ok, data} = Upload.store(file, filters: [Pleroma.Upload.Filter.Dedupe])
|
{:ok, data} = Upload.store(file, filters: [Pleroma.Upload.Filter.Dedupe])
|
||||||
|
|
||||||
assert List.first(data["url"])["href"] ==
|
assert List.first(data["url"])["href"] ==
|
||||||
Pleroma.Upload.base_url() <>
|
Path.join([Pleroma.Upload.base_url(), expected_path])
|
||||||
"e30397b58d226d6583ab5b8b3c5defb0c682bda5c31ef07a9f57c1c4986e3781.jpg"
|
|
||||||
end
|
end
|
||||||
|
|
||||||
test "copies the file to the configured folder without deduping" do
|
test "copies the file to the configured folder without deduping" do
|
||||||
|
|
Loading…
Reference in a new issue