Specifically strip mentions for search indexing
This commit is contained in:
parent
6256822afd
commit
5a39866388
3 changed files with 27 additions and 1 deletions
|
@ -13,6 +13,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
|
||||||
|
|
||||||
def run(["index"]) do
|
def run(["index"]) do
|
||||||
start_pleroma()
|
start_pleroma()
|
||||||
|
Pleroma.HTML.compile_scrubbers()
|
||||||
|
|
||||||
meili_version =
|
meili_version =
|
||||||
(
|
(
|
||||||
|
|
|
@ -122,7 +122,8 @@ def object_to_search_data(object) do
|
||||||
end
|
end
|
||||||
|
|
||||||
content =
|
content =
|
||||||
with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str),
|
with {:ok, scrubbed} <-
|
||||||
|
FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing),
|
||||||
trimmed <- String.trim(scrubbed) do
|
trimmed <- String.trim(scrubbed) do
|
||||||
trimmed
|
trimmed
|
||||||
end
|
end
|
||||||
|
|
24
priv/scrubbers/search_indexing.ex
Normal file
24
priv/scrubbers/search_indexing.ex
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
# Pleroma: A lightweight social networking server
|
||||||
|
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
|
defmodule Pleroma.HTML.Scrubber.SearchIndexing do
|
||||||
|
@moduledoc """
|
||||||
|
An HTML scrubbing policy that scrubs things for searching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
require FastSanitize.Sanitizer.Meta
|
||||||
|
alias FastSanitize.Sanitizer.Meta
|
||||||
|
|
||||||
|
# Explicitly remove mentions
|
||||||
|
def scrub({:a, attrs, children}) do
|
||||||
|
if(Enum.any?(attrs, fn {att, val} -> att == "class" and String.contains?(val, "mention") end),
|
||||||
|
do: nil,
|
||||||
|
# Strip the tag itself, leave only children (text, presumably)
|
||||||
|
else: children
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
Meta.strip_comments()
|
||||||
|
Meta.strip_everything_not_covered()
|
||||||
|
end
|
Loading…
Reference in a new issue