Specifically strip mentions for search indexing
This commit is contained in:
parent
6256822afd
commit
5a39866388
3 changed files with 27 additions and 1 deletions
|
@ -13,6 +13,7 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
|
|||
|
||||
def run(["index"]) do
|
||||
start_pleroma()
|
||||
Pleroma.HTML.compile_scrubbers()
|
||||
|
||||
meili_version =
|
||||
(
|
||||
|
|
|
@ -122,7 +122,8 @@ def object_to_search_data(object) do
|
|||
end
|
||||
|
||||
content =
|
||||
with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str),
|
||||
with {:ok, scrubbed} <-
|
||||
FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing),
|
||||
trimmed <- String.trim(scrubbed) do
|
||||
trimmed
|
||||
end
|
||||
|
|
24
priv/scrubbers/search_indexing.ex
Normal file
24
priv/scrubbers/search_indexing.ex
Normal file
|
@ -0,0 +1,24 @@
|
|||
# Pleroma: A lightweight social networking server
|
||||
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
|
||||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
defmodule Pleroma.HTML.Scrubber.SearchIndexing do
|
||||
@moduledoc """
|
||||
An HTML scrubbing policy that scrubs things for searching.
|
||||
"""
|
||||
|
||||
require FastSanitize.Sanitizer.Meta
|
||||
alias FastSanitize.Sanitizer.Meta
|
||||
|
||||
# Explicitly remove mentions
|
||||
def scrub({:a, attrs, children}) do
|
||||
if(Enum.any?(attrs, fn {att, val} -> att == "class" and String.contains?(val, "mention") end),
|
||||
do: nil,
|
||||
# Strip the tag itself, leave only children (text, presumably)
|
||||
else: children
|
||||
)
|
||||
end
|
||||
|
||||
Meta.strip_comments()
|
||||
Meta.strip_everything_not_covered()
|
||||
end
|
Loading…
Reference in a new issue