Merge branch 'feat/allow_alt_text_search_config' into 'develop'
allow user defined text search config in database See merge request pleroma/pleroma!3275
This commit is contained in:
commit
6e68058b63
6 changed files with 123 additions and 5 deletions
|
@ -141,3 +141,21 @@ but should only be run if necessary. **It is safe to cancel this.**
|
||||||
```sh
|
```sh
|
||||||
mix pleroma.database ensure_expiration
|
mix pleroma.database ensure_expiration
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Change Text Search Configuration
|
||||||
|
|
||||||
|
Change `default_text_search_config` for database and (if necessary) text_search_config used in index, then rebuild index (it may take time).
|
||||||
|
|
||||||
|
=== "OTP"
|
||||||
|
|
||||||
|
```sh
|
||||||
|
./bin/pleroma_ctl database set_text_search_config english
|
||||||
|
```
|
||||||
|
|
||||||
|
=== "From Source"
|
||||||
|
|
||||||
|
```sh
|
||||||
|
mix pleroma.database set_text_search_config english
|
||||||
|
```
|
||||||
|
|
||||||
|
See [PostgreSQL documentation](https://www.postgresql.org/docs/current/textsearch-configuration.html) and `docs/configuration/howto_search_cjk.md` for more detail.
|
||||||
|
|
42
docs/configuration/howto_search_cjk.md
Normal file
42
docs/configuration/howto_search_cjk.md
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
# How to enable text search for Chinese, Japanese and Korean
|
||||||
|
|
||||||
|
Pleroma's full text search feature is powered by PostgreSQL's native [text search](https://www.postgresql.org/docs/current/textsearch.html), it works well out of box for most of languages, but needs extra configurations for some asian languages like Chinese, Japanese and Korean (CJK).
|
||||||
|
|
||||||
|
|
||||||
|
## Setup and test the new search config
|
||||||
|
|
||||||
|
In most cases, you would need an extension installed to support parsing CJK text. Here are a few extension you may choose from, or you are more than welcome to share additional ones you found working for you with the rest of Pleroma community.
|
||||||
|
|
||||||
|
* [a generic n-gram parser](https://github.com/huangjimmy/pg_cjk_parser) supports Simplifed/Traditional Chinese, Japanese, and Korean
|
||||||
|
* [a Korean parser](https://github.com/i0seph/textsearch_ko) based on mecab
|
||||||
|
* [a Japanese parser](https://www.amris.co.jp/tsja/index.html) based on mecab
|
||||||
|
* [zhparser](https://github.com/amutu/zhparser/) is a PostgreSQL extension base on the Simple Chinese Word Segmentation(SCWS)
|
||||||
|
* [another Chinese parser](https://github.com/jaiminpan/pg_jieba) based on Jieba Chinese Word Segmentation
|
||||||
|
|
||||||
|
Once you have the new search config , make sure you test it with the `pleroma` user in PostgreSQL (change `YOUR.CONFIG` to your real configuration name)
|
||||||
|
```
|
||||||
|
SELECT ts_debug('YOUR.CONFIG', '安装和配置Nginx, ElixirとErlangをインストールします');
|
||||||
|
```
|
||||||
|
Check output of the query, and see if it matches your expectation.
|
||||||
|
|
||||||
|
|
||||||
|
## Update text search config and index in database
|
||||||
|
|
||||||
|
=== "OTP"
|
||||||
|
|
||||||
|
```sh
|
||||||
|
./bin/pleroma_ctl database set_text_search_config YOUR.CONFIG
|
||||||
|
```
|
||||||
|
|
||||||
|
=== "From Source"
|
||||||
|
|
||||||
|
```sh
|
||||||
|
mix pleroma.database set_text_search_config YOUR.CONFIG
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: index update may take a while.
|
||||||
|
|
||||||
|
## Restart database connection
|
||||||
|
Since some changes above will only apply with a new database connection, you will have to restart either Pleroma or PostgreSQL process, or use `pg_terminate_backend` SQL command without restarting either.
|
||||||
|
|
||||||
|
Now the search results of statuses should be much more friendly for your language of choice, the results for searching users and tags were not changed, as the default parsing/matching should work for most cases.
|
|
@ -167,4 +167,51 @@ def run(["ensure_expiration"]) do
|
||||||
end)
|
end)
|
||||||
|> Stream.run()
|
|> Stream.run()
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def run(["set_text_search_config", tsconfig]) do
|
||||||
|
start_pleroma()
|
||||||
|
%{rows: [[tsc]]} = Ecto.Adapters.SQL.query!(Pleroma.Repo, "SHOW default_text_search_config;")
|
||||||
|
shell_info("Current default_text_search_config: #{tsc}")
|
||||||
|
|
||||||
|
%{rows: [[db]]} = Ecto.Adapters.SQL.query!(Pleroma.Repo, "SELECT current_database();")
|
||||||
|
shell_info("Update default_text_search_config: #{tsconfig}")
|
||||||
|
|
||||||
|
%{messages: msg} =
|
||||||
|
Ecto.Adapters.SQL.query!(
|
||||||
|
Pleroma.Repo,
|
||||||
|
"ALTER DATABASE #{db} SET default_text_search_config = '#{tsconfig}';"
|
||||||
|
)
|
||||||
|
|
||||||
|
# non-exist config will not raise excpetion but only give >0 messages
|
||||||
|
if length(msg) > 0 do
|
||||||
|
shell_info("Error: #{inspect(msg, pretty: true)}")
|
||||||
|
else
|
||||||
|
rum_enabled = Pleroma.Config.get([:database, :rum_enabled])
|
||||||
|
shell_info("Recreate index, RUM: #{rum_enabled}")
|
||||||
|
|
||||||
|
# Note SQL below needs to be kept up-to-date with latest GIN or RUM index definition in future
|
||||||
|
if rum_enabled do
|
||||||
|
Ecto.Adapters.SQL.query!(
|
||||||
|
Pleroma.Repo,
|
||||||
|
"CREATE OR REPLACE FUNCTION objects_fts_update() RETURNS trigger AS $$ BEGIN
|
||||||
|
new.fts_content := to_tsvector(new.data->>'content');
|
||||||
|
RETURN new;
|
||||||
|
END
|
||||||
|
$$ LANGUAGE plpgsql"
|
||||||
|
)
|
||||||
|
|
||||||
|
shell_info("Refresh RUM index")
|
||||||
|
Ecto.Adapters.SQL.query!(Pleroma.Repo, "UPDATE objects SET updated_at = NOW();")
|
||||||
|
else
|
||||||
|
Ecto.Adapters.SQL.query!(Pleroma.Repo, "DROP INDEX IF EXISTS objects_fts;")
|
||||||
|
|
||||||
|
Ecto.Adapters.SQL.query!(
|
||||||
|
Pleroma.Repo,
|
||||||
|
"CREATE INDEX objects_fts ON objects USING gin(to_tsvector('#{tsconfig}', data->>'content')); "
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
shell_info('Done.')
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -64,7 +64,7 @@ defp query_with(q, :gin, search_query, :plain) do
|
||||||
from([a, o] in q,
|
from([a, o] in q,
|
||||||
where:
|
where:
|
||||||
fragment(
|
fragment(
|
||||||
"to_tsvector('english', ?->>'content') @@ plainto_tsquery('english', ?)",
|
"to_tsvector(?->>'content') @@ plainto_tsquery(?)",
|
||||||
o.data,
|
o.data,
|
||||||
^search_query
|
^search_query
|
||||||
)
|
)
|
||||||
|
@ -75,7 +75,7 @@ defp query_with(q, :gin, search_query, :websearch) do
|
||||||
from([a, o] in q,
|
from([a, o] in q,
|
||||||
where:
|
where:
|
||||||
fragment(
|
fragment(
|
||||||
"to_tsvector('english', ?->>'content') @@ websearch_to_tsquery('english', ?)",
|
"to_tsvector(?->>'content') @@ websearch_to_tsquery(?)",
|
||||||
o.data,
|
o.data,
|
||||||
^search_query
|
^search_query
|
||||||
)
|
)
|
||||||
|
@ -86,7 +86,7 @@ defp query_with(q, :rum, search_query, :plain) do
|
||||||
from([a, o] in q,
|
from([a, o] in q,
|
||||||
where:
|
where:
|
||||||
fragment(
|
fragment(
|
||||||
"? @@ plainto_tsquery('english', ?)",
|
"? @@ plainto_tsquery(?)",
|
||||||
o.fts_content,
|
o.fts_content,
|
||||||
^search_query
|
^search_query
|
||||||
),
|
),
|
||||||
|
@ -98,7 +98,7 @@ defp query_with(q, :rum, search_query, :websearch) do
|
||||||
from([a, o] in q,
|
from([a, o] in q,
|
||||||
where:
|
where:
|
||||||
fragment(
|
fragment(
|
||||||
"? @@ websearch_to_tsquery('english', ?)",
|
"? @@ websearch_to_tsquery(?)",
|
||||||
o.fts_content,
|
o.fts_content,
|
||||||
^search_query
|
^search_query
|
||||||
),
|
),
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
defmodule Pleroma.Repo.Migrations.AddDefaultTextSearchConfig do
|
||||||
|
use Ecto.Migration
|
||||||
|
|
||||||
|
def change do
|
||||||
|
execute("DO $$
|
||||||
|
BEGIN
|
||||||
|
execute 'ALTER DATABASE '||current_database()||' SET default_text_search_config = ''english'' ';
|
||||||
|
END
|
||||||
|
$$;")
|
||||||
|
end
|
||||||
|
end
|
|
@ -17,7 +17,7 @@ def up do
|
||||||
|
|
||||||
execute("CREATE FUNCTION objects_fts_update() RETURNS trigger AS $$
|
execute("CREATE FUNCTION objects_fts_update() RETURNS trigger AS $$
|
||||||
begin
|
begin
|
||||||
new.fts_content := to_tsvector('english', new.data->>'content');
|
new.fts_content := to_tsvector(new.data->>'content');
|
||||||
return new;
|
return new;
|
||||||
end
|
end
|
||||||
$$ LANGUAGE plpgsql")
|
$$ LANGUAGE plpgsql")
|
||||||
|
|
Loading…
Reference in a new issue