better process tracking for admin dashboard
Some checks failed
CI/CD Pipeline / build (push) Failing after 5s
Some checks failed
CI/CD Pipeline / build (push) Failing after 5s
This commit is contained in:
@@ -27,9 +27,16 @@ defmodule ElixirAi.ChatUtils do
|
||||
}
|
||||
|
||||
run_function = fn current_message_id, tool_call_id, args ->
|
||||
Task.start(fn ->
|
||||
result = function.(args)
|
||||
send(server, {:tool_response, current_message_id, tool_call_id, result})
|
||||
Task.start_link(fn ->
|
||||
try do
|
||||
result = function.(args)
|
||||
send(server, {:tool_response, current_message_id, tool_call_id, result})
|
||||
rescue
|
||||
e ->
|
||||
reason = Exception.format(:error, e, __STACKTRACE__)
|
||||
Logger.error("Tool task crashed: #{reason}")
|
||||
send(server, {:tool_response, current_message_id, tool_call_id, {:error, reason}})
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
@@ -41,7 +48,7 @@ defmodule ElixirAi.ChatUtils do
|
||||
end
|
||||
|
||||
def request_ai_response(server, messages, tools, provider) do
|
||||
Task.start(fn ->
|
||||
Task.start_link(fn ->
|
||||
api_url = provider.completions_url
|
||||
api_key = provider.api_token
|
||||
model = provider.model_name
|
||||
@@ -82,7 +89,8 @@ defmodule ElixirAi.ChatUtils do
|
||||
:ok
|
||||
|
||||
{:error, reason} ->
|
||||
IO.warn("AI request failed: #{inspect(reason)} for #{api_url}")
|
||||
Logger.warning("AI request failed: #{inspect(reason)} for #{api_url}")
|
||||
send(server, {:ai_request_error, reason})
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
@@ -12,6 +12,7 @@ defmodule ElixirAi.Application do
|
||||
{Cluster.Supervisor,
|
||||
[Application.get_env(:libcluster, :topologies, []), [name: ElixirAi.ClusterSupervisor]]},
|
||||
{Phoenix.PubSub, name: ElixirAi.PubSub},
|
||||
{ElixirAi.LiveViewPG, []},
|
||||
ElixirAi.ToolTesting,
|
||||
ElixirAiWeb.Endpoint,
|
||||
{Horde.Registry,
|
||||
@@ -55,7 +56,7 @@ defmodule ElixirAi.Application do
|
||||
if Application.get_env(:elixir_ai, :env) == :test do
|
||||
Supervisor.child_spec({Task, fn -> :ok end}, id: :skip_default_provider)
|
||||
else
|
||||
{Task, fn -> ElixirAi.AiProvider.ensure_default_provider() end}
|
||||
{Task, fn -> ElixirAi.AiProvider.ensure_configured_providers() end}
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -49,6 +49,7 @@ defmodule ElixirAi.ChatRunner do
|
||||
"Last message role was #{last_message.role}, requesting AI response for conversation #{name}"
|
||||
)
|
||||
|
||||
broadcast_ui(name, :recovery_restart)
|
||||
ElixirAi.ChatUtils.request_ai_response(self(), messages, tools(self(), name), provider)
|
||||
end
|
||||
|
||||
|
||||
@@ -3,17 +3,33 @@ defmodule ElixirAi.ClusterSingleton do
|
||||
require Logger
|
||||
|
||||
@sync_delay_ms 200
|
||||
@retry_delay_ms 500
|
||||
|
||||
@singletons [ElixirAi.ConversationManager]
|
||||
|
||||
def start_link(opts), do: GenServer.start_link(__MODULE__, opts, name: __MODULE__)
|
||||
|
||||
def status, do: GenServer.call(__MODULE__, :status)
|
||||
|
||||
def configured_singletons, do: @singletons
|
||||
|
||||
def init(_opts) do
|
||||
Process.send_after(self(), :start_singletons, @sync_delay_ms)
|
||||
{:ok, :pending}
|
||||
end
|
||||
|
||||
def handle_info(:start_singletons, _state) do
|
||||
def handle_info(:start_singletons, state) do
|
||||
if Node.list() == [] do
|
||||
Logger.debug("ClusterSingleton: no peer nodes yet, retrying in #{@retry_delay_ms}ms")
|
||||
Process.send_after(self(), :start_singletons, @retry_delay_ms)
|
||||
{:noreply, state}
|
||||
else
|
||||
start_singletons()
|
||||
{:noreply, :started}
|
||||
end
|
||||
end
|
||||
|
||||
defp start_singletons do
|
||||
for module <- @singletons do
|
||||
if singleton_exists?(module) do
|
||||
Logger.debug(
|
||||
@@ -37,10 +53,10 @@ defmodule ElixirAi.ClusterSingleton do
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
{:noreply, :started}
|
||||
end
|
||||
|
||||
def handle_call(:status, _from, state), do: {:reply, state, state}
|
||||
|
||||
defp singleton_exists?(module) do
|
||||
case Horde.Registry.lookup(ElixirAi.ChatRegistry, module) do
|
||||
[{pid, _metadata} | _] when is_pid(pid) ->
|
||||
|
||||
@@ -21,7 +21,7 @@ defmodule ElixirAi.ConversationManager do
|
||||
def init(_) do
|
||||
Logger.info("ConversationManager initializing...")
|
||||
send(self(), :load_conversations)
|
||||
{:ok, %{conversations: :loading, subscriptions: MapSet.new()}}
|
||||
{:ok, %{conversations: :loading, subscriptions: MapSet.new(), runners: %{}}}
|
||||
end
|
||||
|
||||
def create_conversation(name, ai_provider_id) do
|
||||
@@ -40,6 +40,10 @@ defmodule ElixirAi.ConversationManager do
|
||||
GenServer.call(@name, {:get_messages, name})
|
||||
end
|
||||
|
||||
def list_runners do
|
||||
GenServer.call(@name, :list_runners)
|
||||
end
|
||||
|
||||
def handle_call(message, from, %{conversations: :loading} = state) do
|
||||
Logger.warning(
|
||||
"Received call #{inspect(message)} from #{inspect(from)} while loading conversations. Retrying after delay."
|
||||
@@ -75,7 +79,7 @@ defmodule ElixirAi.ConversationManager do
|
||||
%{conversations: conversations} = state
|
||||
) do
|
||||
if Map.has_key?(conversations, name) do
|
||||
reply_with_started(name, state)
|
||||
reply_with_conversation(name, state)
|
||||
else
|
||||
{:reply, {:error, :not_found}, state}
|
||||
end
|
||||
@@ -84,10 +88,6 @@ defmodule ElixirAi.ConversationManager do
|
||||
def handle_call(:list, _from, %{conversations: conversations} = state) do
|
||||
keys = Map.keys(conversations)
|
||||
|
||||
Logger.debug(
|
||||
"list_conversations returning: #{inspect(keys, limit: :infinity, printable_limit: :infinity, binaries: :as_binaries)}"
|
||||
)
|
||||
|
||||
{:reply, keys, state}
|
||||
end
|
||||
|
||||
@@ -95,6 +95,19 @@ defmodule ElixirAi.ConversationManager do
|
||||
{:reply, Map.get(conversations, name, []), state}
|
||||
end
|
||||
|
||||
def handle_call(:list_runners, _from, state) do
|
||||
{:reply, Map.get(state, :runners, %{}), state}
|
||||
end
|
||||
|
||||
def handle_info({:DOWN, _ref, :process, pid, reason}, %{runners: runners} = state) do
|
||||
runners =
|
||||
Enum.reject(runners, fn {_name, info} -> info.pid == pid end)
|
||||
|> Map.new()
|
||||
|
||||
Logger.info("ConversationManager: runner #{inspect(pid)} went down (#{inspect(reason)})")
|
||||
{:noreply, %{state | runners: runners}}
|
||||
end
|
||||
|
||||
def handle_info({:db_error, reason}, state) do
|
||||
Logger.error("ConversationManager received db_error: #{inspect(reason)}")
|
||||
{:noreply, state}
|
||||
@@ -138,10 +151,13 @@ defmodule ElixirAi.ConversationManager do
|
||||
end
|
||||
end
|
||||
|
||||
# Returns {pid} to callers that only need to know the process started (e.g. create).
|
||||
defp reply_with_started(name, state, update_state \\ fn s -> s end) do
|
||||
case start_and_subscribe(name, state.subscriptions) do
|
||||
{:ok, pid, new_subscriptions} ->
|
||||
new_state = update_state.(%{state | subscriptions: new_subscriptions})
|
||||
case start_and_subscribe(name, state) do
|
||||
{:ok, pid, new_subscriptions, new_runners} ->
|
||||
new_state =
|
||||
update_state.(%{state | subscriptions: new_subscriptions, runners: new_runners})
|
||||
|
||||
{:reply, {:ok, pid}, new_state}
|
||||
|
||||
{:error, _reason} = error ->
|
||||
@@ -149,7 +165,21 @@ defmodule ElixirAi.ConversationManager do
|
||||
end
|
||||
end
|
||||
|
||||
defp start_and_subscribe(name, subscriptions) do
|
||||
# Returns the full conversation state using the pid directly, bypassing the
|
||||
# Horde registry (which may not have synced yet on the calling node).
|
||||
defp reply_with_conversation(name, state) do
|
||||
case start_and_subscribe(name, state) do
|
||||
{:ok, pid, new_subscriptions, new_runners} ->
|
||||
new_state = %{state | subscriptions: new_subscriptions, runners: new_runners}
|
||||
conversation = GenServer.call(pid, :get_conversation)
|
||||
{:reply, {:ok, conversation}, new_state}
|
||||
|
||||
{:error, _reason} = error ->
|
||||
{:reply, error, state}
|
||||
end
|
||||
end
|
||||
|
||||
defp start_and_subscribe(name, state) do
|
||||
result =
|
||||
case Horde.DynamicSupervisor.start_child(
|
||||
ElixirAi.ChatRunnerSupervisor,
|
||||
@@ -163,14 +193,24 @@ defmodule ElixirAi.ConversationManager do
|
||||
case result do
|
||||
{:ok, pid} ->
|
||||
new_subscriptions =
|
||||
if MapSet.member?(subscriptions, name) do
|
||||
subscriptions
|
||||
if MapSet.member?(state.subscriptions, name) do
|
||||
state.subscriptions
|
||||
else
|
||||
Phoenix.PubSub.subscribe(ElixirAi.PubSub, conversation_message_topic(name))
|
||||
MapSet.put(subscriptions, name)
|
||||
MapSet.put(state.subscriptions, name)
|
||||
end
|
||||
|
||||
{:ok, pid, new_subscriptions}
|
||||
existing_runners = Map.get(state, :runners, %{})
|
||||
|
||||
new_runners =
|
||||
if Map.has_key?(existing_runners, name) do
|
||||
existing_runners
|
||||
else
|
||||
Process.monitor(pid)
|
||||
Map.put(existing_runners, name, %{pid: pid, node: node(pid)})
|
||||
end
|
||||
|
||||
{:ok, pid, new_subscriptions, new_runners}
|
||||
|
||||
error ->
|
||||
error
|
||||
|
||||
@@ -138,28 +138,80 @@ defmodule ElixirAi.AiProvider do
|
||||
end
|
||||
|
||||
def ensure_default_provider do
|
||||
sql = "SELECT COUNT(*) FROM ai_providers"
|
||||
params = %{}
|
||||
endpoint = Application.get_env(:elixir_ai, :ai_endpoint)
|
||||
token = Application.get_env(:elixir_ai, :ai_token)
|
||||
model = Application.get_env(:elixir_ai, :ai_model)
|
||||
|
||||
case DbHelpers.run_sql(sql, params, providers_topic()) do
|
||||
{:error, :db_error} ->
|
||||
{:error, :db_error}
|
||||
if endpoint && token && model do
|
||||
case find_by_name("default") do
|
||||
{:error, :not_found} ->
|
||||
attrs = %{
|
||||
name: "default",
|
||||
model_name: model,
|
||||
api_token: token,
|
||||
completions_url: endpoint
|
||||
}
|
||||
|
||||
rows ->
|
||||
case rows do
|
||||
[%{"count" => 0}] ->
|
||||
attrs = %{
|
||||
name: "default",
|
||||
model_name: Application.fetch_env!(:elixir_ai, :ai_model),
|
||||
api_token: Application.fetch_env!(:elixir_ai, :ai_token),
|
||||
completions_url: Application.fetch_env!(:elixir_ai, :ai_endpoint)
|
||||
}
|
||||
create(attrs)
|
||||
|
||||
create(attrs)
|
||||
{:ok, _} ->
|
||||
:ok
|
||||
|
||||
_ ->
|
||||
:ok
|
||||
{:error, reason} ->
|
||||
{:error, reason}
|
||||
end
|
||||
else
|
||||
Logger.info("AI env vars not configured, skipping default provider creation")
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
def ensure_providers_from_file do
|
||||
case System.get_env("PROVIDERS_CONFIG_PATH") do
|
||||
nil ->
|
||||
:ok
|
||||
|
||||
path ->
|
||||
case YamlElixir.read_from_file(path) do
|
||||
{:ok, %{"providers" => providers}} when is_list(providers) ->
|
||||
Enum.each(providers, &ensure_provider_from_yaml/1)
|
||||
|
||||
{:ok, _} ->
|
||||
Logger.warning("providers.yml: expected a top-level 'providers' list, skipping")
|
||||
|
||||
{:error, reason} ->
|
||||
Logger.warning("Could not read providers config from #{path}: #{inspect(reason)}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def ensure_configured_providers do
|
||||
ensure_default_provider()
|
||||
ensure_providers_from_file()
|
||||
end
|
||||
|
||||
defp ensure_provider_from_yaml(%{
|
||||
"name" => name,
|
||||
"model" => model,
|
||||
"responses_endpoint" => endpoint,
|
||||
"api_key" => api_key
|
||||
}) do
|
||||
case find_by_name(name) do
|
||||
{:error, :not_found} ->
|
||||
Logger.info("Creating provider '#{name}' from providers config file")
|
||||
create(%{name: name, model_name: model, api_token: api_key, completions_url: endpoint})
|
||||
|
||||
{:ok, _} ->
|
||||
Logger.debug("Provider '#{name}' already exists, skipping")
|
||||
|
||||
{:error, reason} ->
|
||||
Logger.warning("Could not check existence of provider '#{name}': #{inspect(reason)}")
|
||||
end
|
||||
end
|
||||
|
||||
defp ensure_provider_from_yaml(entry) do
|
||||
Logger.warning(
|
||||
"Skipping invalid provider entry in providers config file (must have name, model, responses_endpoint, api_key): #{inspect(entry)}"
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
16
lib/elixir_ai/live_view_pg.ex
Normal file
16
lib/elixir_ai/live_view_pg.ex
Normal file
@@ -0,0 +1,16 @@
|
||||
defmodule ElixirAi.LiveViewPG do
|
||||
@moduledoc """
|
||||
Named :pg scope for tracking LiveView processes across the cluster.
|
||||
Each LiveView joins {:liveview, ViewModule} on connect; :pg syncs membership
|
||||
automatically and removes dead processes without any additional cleanup.
|
||||
"""
|
||||
|
||||
def child_spec(_opts) do
|
||||
%{
|
||||
id: __MODULE__,
|
||||
start: {:pg, :start_link, [__MODULE__]},
|
||||
type: :worker,
|
||||
restart: :permanent
|
||||
}
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user