Search: Basic Qdrant/Ollama search

This commit is contained in:
Lain Soykaf 2024-05-14 14:13:37 +04:00
parent c954437cc0
commit cd7e2138d1
3 changed files with 186 additions and 0 deletions

View file

@ -915,6 +915,15 @@
config :pleroma, Pleroma.Uploaders.Uploader, timeout: 30_000
config :pleroma, Pleroma.Search.QdrantSearch,
qdrant_url: "http://127.0.0.1:6333/",
qdrant_api_key: nil,
ollama_url: "http://127.0.0.1:11434",
ollama_model: "snowflake-arctic-embed:xs",
qdrant_index_configuration: %{
vectors: %{size: 384, distance: "Cosine"}
}
# Import environment specific config. This must remain at the bottom
# of this file so it overrides the configuration defined above.
import_config "#{Mix.env()}.exs"

View file

@ -0,0 +1,60 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Mix.Tasks.Pleroma.Search.Indexer do
import Mix.Pleroma
import Ecto.Query
alias Pleroma.Workers.SearchIndexingWorker
def run(["index" | options]) do
{options, [], []} =
OptionParser.parse(
options,
strict: [
limit: :integer
]
)
start_pleroma()
limit = Keyword.get(options, :limit, 100_000)
per_step = 1000
chunks = max(div(limit, per_step), 1)
1..chunks
|> Enum.each(fn step ->
q =
from(a in Pleroma.Activity,
limit: ^per_step,
offset: ^per_step * (^step - 1),
select: [:id],
order_by: [desc: :id]
)
{:ok, ids} =
Pleroma.Repo.transaction(fn ->
Pleroma.Repo.stream(q, timeout: :infinity)
|> Enum.map(fn a ->
a.id
end)
end)
IO.puts("Got #{length(ids)} activities, adding to indexer")
ids
|> Enum.chunk_every(100)
|> Enum.each(fn chunk ->
IO.puts("Adding #{length(chunk)} activities to indexing queue")
chunk
|> Enum.map(fn id ->
SearchIndexingWorker.new(%{"op" => "add_to_index", "activity" => id})
end)
|> Oban.insert_all()
end)
end)
end
end

View file

@ -0,0 +1,117 @@
defmodule Pleroma.Search.QdrantSearch do
@behaviour Pleroma.Search.SearchBackend
import Ecto.Query
alias Pleroma.Activity
alias __MODULE__.QdrantClient
alias __MODULE__.OllamaClient
import Pleroma.Search.Meilisearch, only: [object_to_search_data: 1]
def initialize_index() do
payload = Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_index_configuration])
QdrantClient.put("/collections/posts", payload)
end
def drop_index() do
QdrantClient.delete("/collections/posts")
end
def get_embedding(text) do
with {:ok, %{body: %{"embedding" => embedding}}} <-
OllamaClient.post("/api/embeddings", %{
prompt: text,
model: Pleroma.Config.get([Pleroma.Search.QdrantSearch, :ollama_model])
})
|> IO.inspect() do
{:ok, embedding}
else
_ ->
{:error, "Failed to get embedding"}
end
end
defp build_index_payload(activity, embedding) do
%{
points: [
%{
id: activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!(),
vector: embedding
}
]
}
end
defp build_search_payload(embedding) do
%{
vector: embedding,
limit: 20
}
end
@impl true
def add_to_index(activity) do
# This will only index public or unlisted notes
maybe_search_data = object_to_search_data(activity.object)
IO.puts("TRYING TO INDEX\n\n")
if activity.data["type"] == "Create" and maybe_search_data do
with {:ok, embedding} <- get_embedding(maybe_search_data.content),
{:ok, %{status: 200}} <-
QdrantClient.put(
"/collections/posts/points",
build_index_payload(activity, embedding)
) do
:ok
else
e -> {:error, e}
end
else
:ok
end
end
@impl true
def search(_user, query, _options) do
with {:ok, embedding} <- get_embedding(query),
{:ok, %{body: %{"result" => result}}} <-
QdrantClient.post("/collections/posts/points/search", build_search_payload(embedding)) do
ids =
Enum.map(result, fn %{"id" => id} ->
Ecto.UUID.dump!(id)
end)
from(a in Activity, where: a.id in ^ids)
|> Activity.with_preloaded_object()
|> Activity.restrict_deactivated_users()
|> Ecto.Query.order_by([a], fragment("array_position(?, ?)", ^ids, a.id))
|> Pleroma.Repo.all()
else
_ ->
[]
end
end
@impl true
def remove_from_index(_object) do
:ok
end
end
defmodule Pleroma.Search.QdrantSearch.OllamaClient do
use Tesla
plug(Tesla.Middleware.BaseUrl, Pleroma.Config.get([Pleroma.Search.QdrantSearch, :ollama_url]))
plug(Tesla.Middleware.JSON)
end
defmodule Pleroma.Search.QdrantSearch.QdrantClient do
use Tesla
plug(Tesla.Middleware.BaseUrl, Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_url]))
plug(Tesla.Middleware.JSON)
plug(Tesla.Middleware.Headers, [
{"api-key", Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_api_key])}
])
end