From a60242464e6a92bf6de46a1cf7877799de27a3ce Mon Sep 17 00:00:00 2001 From: lain Date: Thu, 19 Nov 2020 16:12:01 +0100 Subject: [PATCH 01/15] Search: Add option to search with the websearch function --- lib/pleroma/activity/search.ex | 31 ++++++++++++++++-- test/pleroma/activity/search_test.exs | 45 +++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 test/pleroma/activity/search_test.exs diff --git a/lib/pleroma/activity/search.ex b/lib/pleroma/activity/search.ex index ceb365bb3c..8449b9b004 100644 --- a/lib/pleroma/activity/search.ex +++ b/lib/pleroma/activity/search.ex @@ -19,11 +19,13 @@ def search(user, search_query, options \\ []) do offset = Keyword.get(options, :offset, 0) author = Keyword.get(options, :author) + search_function = Pleroma.Config.get([:instance, :search_function], :plain) + Activity |> Activity.with_preloaded_object() |> Activity.restrict_deactivated_users() |> restrict_public() - |> query_with(index_type, search_query) + |> query_with(index_type, search_query, search_function) |> maybe_restrict_local(user) |> maybe_restrict_author(author) |> maybe_restrict_blocked(user) @@ -50,7 +52,7 @@ defp restrict_public(q) do ) end - defp query_with(q, :gin, search_query) do + defp query_with(q, :gin, search_query, :plain) do from([a, o] in q, where: fragment( @@ -61,7 +63,18 @@ defp query_with(q, :gin, search_query) do ) end - defp query_with(q, :rum, search_query) do + defp query_with(q, :gin, search_query, :websearch) do + from([a, o] in q, + where: + fragment( + "to_tsvector('english', ?->>'content') @@ websearch_to_tsquery('english', ?)", + o.data, + ^search_query + ) + ) + end + + defp query_with(q, :rum, search_query, :plain) do from([a, o] in q, where: fragment( @@ -73,6 +86,18 @@ defp query_with(q, :rum, search_query) do ) end + defp query_with(q, :rum, search_query, :websearch) do + from([a, o] in q, + where: + fragment( + "? @@ websearch_to_tsquery('english', ?)", + o.fts_content, + ^search_query + ), + order_by: [fragment("? <=> now()::date", o.inserted_at)] + ) + end + defp maybe_restrict_local(q, user) do limit = Pleroma.Config.get([:instance, :limit_to_local_content], :unauthenticated) diff --git a/test/pleroma/activity/search_test.exs b/test/pleroma/activity/search_test.exs new file mode 100644 index 0000000000..ba3257d64d --- /dev/null +++ b/test/pleroma/activity/search_test.exs @@ -0,0 +1,45 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2020 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Activity.SearchTest do + use Pleroma.DataCase + + import Pleroma.Factory + alias Pleroma.Web.CommonAPI + alias Pleroma.Activity.Search + + test "it finds something" do + user = insert(:user) + {:ok, post} = CommonAPI.post(user, %{status: "it's wednesday my dudes"}) + + [result] = Search.search(nil, "wednesday") + + assert result.id == post.id + end + + test "using plainto_tsquery" do + clear_config([:instance, :search_function], :plain) + + user = insert(:user) + {:ok, post} = CommonAPI.post(user, %{status: "it's wednesday my dudes"}) + {:ok, _post2} = CommonAPI.post(user, %{status: "it's wednesday my bros"}) + + # plainto doesn't understand complex queries + assert [result] = Search.search(nil, "wednesday -dudes") + + assert result.id == post.id + end + + test "using websearch_to_tsquery" do + clear_config([:instance, :search_function], :websearch) + + user = insert(:user) + {:ok, _post} = CommonAPI.post(user, %{status: "it's wednesday my dudes"}) + {:ok, other_post} = CommonAPI.post(user, %{status: "it's wednesday my bros"}) + + assert [result] = Search.search(nil, "wednesday -dudes") + + assert result.id == other_post.id + end +end From 1bad91cba207a9ffb900024cb4759cb5a6aa761a Mon Sep 17 00:00:00 2001 From: lain Date: Thu, 19 Nov 2020 16:13:53 +0100 Subject: [PATCH 02/15] Changelog: Add info about the websearch option --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8658d54409..e3349a2134 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Configuration: Add `:instance, autofollowing_nicknames` setting to provide a way to make accounts automatically follow new users that register on the local Pleroma instance. - Ability to view remote timelines, with ex. `/api/v1/timelines/public?instance=lain.com` and streams `public:remote` and `public:remote:media`. - The site title is now injected as a `title` tag like preloads or metadata. +- Added a configuration option to use the postgresql `websearch` function for more complicated search queries.
API Changes From 1c16c67c21236d924901c5b6d65b57f7db6a2783 Mon Sep 17 00:00:00 2001 From: lain Date: Thu, 19 Nov 2020 16:16:55 +0100 Subject: [PATCH 03/15] Cheatsheet: Add info about search_function --- docs/configuration/cheatsheet.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/configuration/cheatsheet.md b/docs/configuration/cheatsheet.md index 4d18ac30a4..fa59a27e33 100644 --- a/docs/configuration/cheatsheet.md +++ b/docs/configuration/cheatsheet.md @@ -63,6 +63,7 @@ To add configuration to your config file, you can copy it from the base config. * `external_user_synchronization`: Enabling following/followers counters synchronization for external users. * `cleanup_attachments`: Remove attachments along with statuses. Does not affect duplicate files and attachments without status. Enabling this will increase load to database when deleting statuses on larger instances. * `show_reactions`: Let favourites and emoji reactions be viewed through the API (default: `true`). +* `search_function`: What search function to use for fulltext search. Possible values are `:websearch` and `:plain`. `:websearch` enables more complex search queries, but requires at least PostgreSQL 11. (default: `websearch`) ## Welcome * `direct_message`: - welcome message sent as a direct message. From 4a5ab690ef54f83e34edacd5089ce53844ffbee5 Mon Sep 17 00:00:00 2001 From: lain Date: Thu, 19 Nov 2020 16:17:14 +0100 Subject: [PATCH 04/15] Config: Set search_function to `websearch` by default --- config/config.exs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/config.exs b/config/config.exs index 1ac140ed0e..47eb184425 100644 --- a/config/config.exs +++ b/config/config.exs @@ -263,7 +263,8 @@ length: 16 ] ], - show_reactions: true + show_reactions: true, + search_function: :websearch config :pleroma, :welcome, direct_message: [ From 3b86ad0744558676be8de19cb3ff9ad83295aa7a Mon Sep 17 00:00:00 2001 From: lain Date: Thu, 19 Nov 2020 16:26:17 +0100 Subject: [PATCH 05/15] Changelog: Document breaking change. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e3349a2134..8b41e22726 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Polls now always return a `voters_count`, even if they are single-choice. - Admin Emails: The ap id is used as the user link in emails now. +- *Breaking* Configuration: Use `websearch` function by default. If you're using a PostgreSQL version below 11, set `:instance, :search_function` to `:plain` in your configuration. ### Added From 81b6f02a5ee0dfd734f6cadf917161bdfd1b8195 Mon Sep 17 00:00:00 2001 From: lain Date: Thu, 19 Nov 2020 16:48:51 +0100 Subject: [PATCH 06/15] Search Test: linting --- test/pleroma/activity/search_test.exs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/pleroma/activity/search_test.exs b/test/pleroma/activity/search_test.exs index ba3257d64d..15591b726b 100644 --- a/test/pleroma/activity/search_test.exs +++ b/test/pleroma/activity/search_test.exs @@ -3,11 +3,11 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Activity.SearchTest do - use Pleroma.DataCase - - import Pleroma.Factory - alias Pleroma.Web.CommonAPI alias Pleroma.Activity.Search + alias Pleroma.Web.CommonAPI + import Pleroma.Factory + + use Pleroma.DataCase test "it finds something" do user = insert(:user) From 783fa797bbe356611aa5d61e22e62b2b4bd6dbe6 Mon Sep 17 00:00:00 2001 From: lain Date: Thu, 19 Nov 2020 16:53:26 +0100 Subject: [PATCH 07/15] SearchController Test: Fix test --- .../web/mastodon_api/controllers/search_controller_test.exs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs index 04dc6f4458..b77614b7c5 100644 --- a/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs @@ -279,6 +279,8 @@ test "search", %{conn: conn} do end test "search fetches remote statuses and prefers them over other results", %{conn: conn} do + clear_config([:instance, :search_function], :plain) + capture_log(fn -> {:ok, %{id: activity_id}} = CommonAPI.post(insert(:user), %{ From b38c3de411a863e51f4e00cb34f4ce59c8d333ea Mon Sep 17 00:00:00 2001 From: lain Date: Thu, 19 Nov 2020 17:15:05 +0100 Subject: [PATCH 08/15] Gitlab CI: Update postgres --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9a754ed782..1b05e4a08e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -57,7 +57,7 @@ unit-testing: policy: pull services: - - name: postgres:9.6 + - name: postgres:13 alias: postgres command: ["postgres", "-c", "fsync=off", "-c", "synchronous_commit=off", "-c", "full_page_writes=off"] script: From a407e33c78121abf880f257d291f45ed28b55eeb Mon Sep 17 00:00:00 2001 From: lain Date: Fri, 20 Nov 2020 16:26:22 +0100 Subject: [PATCH 09/15] Application: Save postgres version in the environment --- lib/pleroma/application.ex | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index 8f08a62221..f2a8c7825c 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -109,7 +109,28 @@ def start(_type, _args) do # See http://elixir-lang.org/docs/stable/elixir/Supervisor.html # for other strategies and supported options opts = [strategy: :one_for_one, name: Pleroma.Supervisor] - Supervisor.start_link(children, opts) + result = Supervisor.start_link(children, opts) + + set_postgres_server_version() + + result + end + + defp set_postgres_server_version() do + version = + with %{rows: [[version]]} <- Ecto.Adapters.SQL.query!(Pleroma.Repo, "show server_version"), + {num, _} <- Float.parse(version) do + num + else + e -> + Logger.warn( + "Could not get the postgres version: #{inspect(e)}.\nSetting the default value of 9.6" + ) + + 9.6 + end + + Application.put_env(:postgres, :version, version) end def load_custom_modules do From 9a1e5f5d48ef9f3b5a817c02dc8820aa99a6f693 Mon Sep 17 00:00:00 2001 From: lain Date: Fri, 20 Nov 2020 16:26:43 +0100 Subject: [PATCH 10/15] Search: Change search method based on detected pg version --- lib/pleroma/activity/search.ex | 7 ++++++- test/pleroma/activity/search_test.exs | 9 +++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/lib/pleroma/activity/search.ex b/lib/pleroma/activity/search.ex index cc98e2d065..ea97832251 100644 --- a/lib/pleroma/activity/search.ex +++ b/lib/pleroma/activity/search.ex @@ -19,7 +19,12 @@ def search(user, search_query, options \\ []) do offset = Keyword.get(options, :offset, 0) author = Keyword.get(options, :author) - search_function = Pleroma.Config.get([:instance, :search_function], :plain) + search_function = + if Application.get_env(:postgres, :version) >= 11 do + :websearch + else + :plain + end Activity |> Activity.with_preloaded_object() diff --git a/test/pleroma/activity/search_test.exs b/test/pleroma/activity/search_test.exs index 15591b726b..37c0feeeab 100644 --- a/test/pleroma/activity/search_test.exs +++ b/test/pleroma/activity/search_test.exs @@ -18,8 +18,9 @@ test "it finds something" do assert result.id == post.id end - test "using plainto_tsquery" do - clear_config([:instance, :search_function], :plain) + test "using plainto_tsquery on postgres < 11" do + old_config = Application.get_env(:postgres, :version) + Application.put_env(:postgres, :version, 10.0) user = insert(:user) {:ok, post} = CommonAPI.post(user, %{status: "it's wednesday my dudes"}) @@ -29,11 +30,11 @@ test "using plainto_tsquery" do assert [result] = Search.search(nil, "wednesday -dudes") assert result.id == post.id + + Application.put_env(:postgres, :version, old_config) end test "using websearch_to_tsquery" do - clear_config([:instance, :search_function], :websearch) - user = insert(:user) {:ok, _post} = CommonAPI.post(user, %{status: "it's wednesday my dudes"}) {:ok, other_post} = CommonAPI.post(user, %{status: "it's wednesday my bros"}) From cc52f0356675b9200f0ecef2b5cc96d16c6fb704 Mon Sep 17 00:00:00 2001 From: lain Date: Fri, 20 Nov 2020 16:28:00 +0100 Subject: [PATCH 11/15] Changelog: Add info about search changes --- CHANGELOG.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a682036f40..598fd59e3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Polls now always return a `voters_count`, even if they are single-choice. - Admin Emails: The ap id is used as the user link in emails now. -- *Breaking* Configuration: Use `websearch` function by default. If you're using a PostgreSQL version below 11, set `:instance, :search_function` to `:plain` in your configuration. +- Search: When using Postgres 11+, Pleroma will use the `websearch_to_tsvector` function to parse search queries. ### Added @@ -23,7 +23,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Ability to view remote timelines, with ex. `/api/v1/timelines/public?instance=lain.com` and streams `public:remote` and `public:remote:media`. - The site title is now injected as a `title` tag like preloads or metadata. - Password reset tokens now are not accepted after a certain age. -- Added a configuration option to use the postgresql `websearch` function for more complicated search queries.
API Changes From 8532325d65ccf3dccdfc129fe0a49d1fb2cb580f Mon Sep 17 00:00:00 2001 From: lain Date: Fri, 20 Nov 2020 16:29:11 +0100 Subject: [PATCH 12/15] SearchController Test: Fix test. --- .../web/mastodon_api/controllers/search_controller_test.exs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs index b77614b7c5..2b2579857a 100644 --- a/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs @@ -279,7 +279,8 @@ test "search", %{conn: conn} do end test "search fetches remote statuses and prefers them over other results", %{conn: conn} do - clear_config([:instance, :search_function], :plain) + old_config = Application.get_env(:postgres, :version) + Application.put_env(:postgres, :version, 10.0) capture_log(fn -> {:ok, %{id: activity_id}} = @@ -297,6 +298,8 @@ test "search fetches remote statuses and prefers them over other results", %{con %{"id" => ^activity_id} ] = results["statuses"] end) + + Application.put_env(:postgres, :version, old_config) end test "search doesn't show statuses that it shouldn't", %{conn: conn} do From 25a03a9b5b8b37e3ac5bd69f4b520695e4b148bb Mon Sep 17 00:00:00 2001 From: lain Date: Fri, 20 Nov 2020 16:33:11 +0100 Subject: [PATCH 13/15] Config, Docs: Remove search_function --- config/config.exs | 3 +-- docs/configuration/cheatsheet.md | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/config/config.exs b/config/config.exs index 8d05457042..be52576631 100644 --- a/config/config.exs +++ b/config/config.exs @@ -264,8 +264,7 @@ ] ], show_reactions: true, - password_reset_token_validity: 60 * 60 * 24, - search_function: :websearch + password_reset_token_validity: 60 * 60 * 24 config :pleroma, :welcome, direct_message: [ diff --git a/docs/configuration/cheatsheet.md b/docs/configuration/cheatsheet.md index 1b321d103c..85551362ce 100644 --- a/docs/configuration/cheatsheet.md +++ b/docs/configuration/cheatsheet.md @@ -64,7 +64,6 @@ To add configuration to your config file, you can copy it from the base config. * `cleanup_attachments`: Remove attachments along with statuses. Does not affect duplicate files and attachments without status. Enabling this will increase load to database when deleting statuses on larger instances. * `show_reactions`: Let favourites and emoji reactions be viewed through the API (default: `true`). * `password_reset_token_validity`: The time after which reset tokens aren't accepted anymore, in seconds (default: one day). -* `search_function`: What search function to use for fulltext search. Possible values are `:websearch` and `:plain`. `:websearch` enables more complex search queries, but requires at least PostgreSQL 11. (default: `websearch`) ## Welcome * `direct_message`: - welcome message sent as a direct message. From e4289792d28cb38c520e03df2ed82f6f30eb4c51 Mon Sep 17 00:00:00 2001 From: lain Date: Fri, 20 Nov 2020 16:38:05 +0100 Subject: [PATCH 14/15] Linting. --- lib/pleroma/application.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index f2a8c7825c..17a241cdfd 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -116,7 +116,7 @@ def start(_type, _args) do result end - defp set_postgres_server_version() do + defp set_postgres_server_version do version = with %{rows: [[version]]} <- Ecto.Adapters.SQL.query!(Pleroma.Repo, "show server_version"), {num, _} <- Float.parse(version) do From 67b15cc033fd1154d1e6a96a5c5f141921c2e688 Mon Sep 17 00:00:00 2001 From: lain Date: Mon, 23 Nov 2020 15:29:55 +0100 Subject: [PATCH 15/15] Search: Save detected pg version in a persistent term. --- lib/pleroma/activity/search.ex | 2 +- lib/pleroma/application.ex | 2 +- test/pleroma/activity/search_test.exs | 6 +++--- .../web/mastodon_api/controllers/search_controller_test.exs | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/pleroma/activity/search.ex b/lib/pleroma/activity/search.ex index ea97832251..babf9520ba 100644 --- a/lib/pleroma/activity/search.ex +++ b/lib/pleroma/activity/search.ex @@ -20,7 +20,7 @@ def search(user, search_query, options \\ []) do author = Keyword.get(options, :author) search_function = - if Application.get_env(:postgres, :version) >= 11 do + if :persistent_term.get({Pleroma.Repo, :postgres_version}) >= 11 do :websearch else :plain diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index 22936bd7f2..bd568d8580 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -131,7 +131,7 @@ defp set_postgres_server_version do 9.6 end - Application.put_env(:postgres, :version, version) + :persistent_term.put({Pleroma.Repo, :postgres_version}, version) end def load_custom_modules do diff --git a/test/pleroma/activity/search_test.exs b/test/pleroma/activity/search_test.exs index 37c0feeeab..9889491543 100644 --- a/test/pleroma/activity/search_test.exs +++ b/test/pleroma/activity/search_test.exs @@ -19,8 +19,8 @@ test "it finds something" do end test "using plainto_tsquery on postgres < 11" do - old_config = Application.get_env(:postgres, :version) - Application.put_env(:postgres, :version, 10.0) + old_version = :persistent_term.get({Pleroma.Repo, :postgres_version}) + :persistent_term.put({Pleroma.Repo, :postgres_version}, 10.0) user = insert(:user) {:ok, post} = CommonAPI.post(user, %{status: "it's wednesday my dudes"}) @@ -31,7 +31,7 @@ test "using plainto_tsquery on postgres < 11" do assert result.id == post.id - Application.put_env(:postgres, :version, old_config) + :persistent_term.put({Pleroma.Repo, :postgres_version}, old_version) end test "using websearch_to_tsquery" do diff --git a/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs index 2b2579857a..2f0bce450d 100644 --- a/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs @@ -279,8 +279,8 @@ test "search", %{conn: conn} do end test "search fetches remote statuses and prefers them over other results", %{conn: conn} do - old_config = Application.get_env(:postgres, :version) - Application.put_env(:postgres, :version, 10.0) + old_version = :persistent_term.get({Pleroma.Repo, :postgres_version}) + :persistent_term.put({Pleroma.Repo, :postgres_version}, 10.0) capture_log(fn -> {:ok, %{id: activity_id}} = @@ -299,7 +299,7 @@ test "search fetches remote statuses and prefers them over other results", %{con ] = results["statuses"] end) - Application.put_env(:postgres, :version, old_config) + :persistent_term.put({Pleroma.Repo, :postgres_version}, old_version) end test "search doesn't show statuses that it shouldn't", %{conn: conn} do