Specifically strip mentions for search indexing

This commit is contained in:
Ekaterina Vaartis 2022-08-27 01:43:59 +03:00
parent 6256822afd
commit 5a39866388
3 changed files with 27 additions and 1 deletions

View file

@ -13,6 +13,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
def run(["index"]) do def run(["index"]) do
start_pleroma() start_pleroma()
Pleroma.HTML.compile_scrubbers()
meili_version = meili_version =
( (

View file

@ -122,7 +122,8 @@ def object_to_search_data(object) do
end end
content = content =
with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str), with {:ok, scrubbed} <-
FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing),
trimmed <- String.trim(scrubbed) do trimmed <- String.trim(scrubbed) do
trimmed trimmed
end end

View file

@ -0,0 +1,24 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.HTML.Scrubber.SearchIndexing do
@moduledoc """
An HTML scrubbing policy that scrubs things for searching.
"""
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
# Explicitly remove mentions
def scrub({:a, attrs, children}) do
if(Enum.any?(attrs, fn {att, val} -> att == "class" and String.contains?(val, "mention") end),
do: nil,
# Strip the tag itself, leave only children (text, presumably)
else: children
)
end
Meta.strip_comments()
Meta.strip_everything_not_covered()
end