general solution to voice control
Some checks failed
CI/CD Pipeline / build (push) Failing after 4s

This commit is contained in:
2026-03-25 09:22:48 -06:00
parent 86ff82a015
commit d857e91241
14 changed files with 309 additions and 31 deletions

View File

@@ -0,0 +1,68 @@
defmodule ElixirAi.AiControllable do
@moduledoc """
Behaviour + macro for LiveViews that expose AI-controllable tools.
Any LiveView that `use`s this module must implement:
- `ai_tools/0` — returns a list of tool spec maps
- `handle_ai_tool_call(tool_name, args, socket)` — handles a dispatched tool call,
returns `{result_string, socket}`.
The macro injects:
- A `handle_info` clause that dispatches `{:page_tool_call, tool_name, args, reply_to}`
messages to the callback and sends the result back to the caller.
- An `on_mount` hook registration that joins the `:pg` group keyed by
`voice_session_id` so VoiceLive can discover sibling page LiveViews.
## Usage
defmodule MyAppWeb.SomeLive do
use MyAppWeb, :live_view
use ElixirAi.AiControllable
@impl ElixirAi.AiControllable
def ai_tools do
[
%{
name: "do_something",
description: "Does something useful",
parameters: %{
"type" => "object",
"properties" => %{"value" => %{"type" => "string"}},
"required" => ["value"]
}
}
]
end
@impl ElixirAi.AiControllable
def handle_ai_tool_call("do_something", %{"value" => val}, socket) do
{"done: \#{val}", assign(socket, value: val)}
end
end
"""
@callback ai_tools() :: [map()]
@callback handle_ai_tool_call(tool_name :: String.t(), args :: map(), socket :: term()) ::
{String.t(), term()}
defmacro __using__(_opts) do
quote do
@behaviour ElixirAi.AiControllable
on_mount ElixirAi.AiControllable.Hook
def handle_info({:page_tool_call, tool_name, args, reply_to}, socket) do
{result, socket} = handle_ai_tool_call(tool_name, args, socket)
send(reply_to, {:page_tool_result, tool_name, result})
{:noreply, socket}
end
def handle_info({:get_ai_tools, reply_to}, socket) do
send(reply_to, {:ai_tools_response, self(), ai_tools()})
{:noreply, socket}
end
end
end
end

View File

@@ -0,0 +1,35 @@
defmodule ElixirAi.AiControllable.Hook do
@moduledoc """
LiveView on_mount hook that registers a page LiveView in the
`:ai_page_tools` pg group so VoiceLive can discover it.
The group key is `{:page, voice_session_id}` where `voice_session_id`
comes from the Plug session, tying the page LiveView to the same browser
tab as VoiceLive.
Only joins when the LiveView module implements `ai_tools/0`
(i.e. uses `ElixirAi.AiControllable`).
"""
import Phoenix.LiveView
import Phoenix.Component, only: [assign: 3]
def on_mount(:default, _params, session, socket) do
voice_session_id = session["voice_session_id"]
module = socket.view
if voice_session_id && function_exported?(module, :ai_tools, 0) do
if connected?(socket) do
try do
:pg.join(ElixirAi.PageToolsPG, {:page, voice_session_id}, self())
catch
:exit, _ -> :ok
end
end
{:cont, assign(socket, :voice_session_id, voice_session_id)}
else
{:cont, socket}
end
end
end

View File

@@ -126,6 +126,42 @@ defmodule ElixirAi.AiTools do
) )
end end
# ---------------------------------------------------------------------------
# Page tools (dynamic, from AiControllable LiveViews)
# ---------------------------------------------------------------------------
@doc """
Builds tool structs for page tools discovered from AiControllable LiveViews.
Each entry in `pids_and_specs` is `{page_pid, [tool_spec, ...]}` where
`tool_spec` is a map with `:name`, `:description`, and `:parameters`.
The generated function sends `{:page_tool_call, name, args, self()}` to
the page LiveView pid and blocks (inside a Task) waiting for the reply.
"""
def build_page_tools(server, pids_and_specs) do
Enum.flat_map(pids_and_specs, fn {page_pid, tool_specs} ->
Enum.map(tool_specs, fn spec ->
ai_tool(
name: spec.name,
description: spec.description,
function: fn args ->
send(page_pid, {:page_tool_call, spec.name, args, self()})
receive do
{:page_tool_result, tool_name, result} when tool_name == spec.name ->
{:ok, result}
after
5_000 -> {:ok, "page tool #{spec.name} timed out"}
end
end,
parameters: spec.parameters,
server: server
)
end)
end)
end
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Private # Private
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------

View File

@@ -2,7 +2,6 @@ defmodule ElixirAi.Application do
@moduledoc false @moduledoc false
use Application use Application
@impl true
def start(_type, _args) do def start(_type, _args) do
children = [ children = [
ElixirAiWeb.Telemetry, ElixirAiWeb.Telemetry,
@@ -13,6 +12,7 @@ defmodule ElixirAi.Application do
[Application.get_env(:libcluster, :topologies, []), [name: ElixirAi.ClusterSupervisor]]}, [Application.get_env(:libcluster, :topologies, []), [name: ElixirAi.ClusterSupervisor]]},
{Phoenix.PubSub, name: ElixirAi.PubSub}, {Phoenix.PubSub, name: ElixirAi.PubSub},
{ElixirAi.LiveViewPG, []}, {ElixirAi.LiveViewPG, []},
{ElixirAi.PageToolsPG, []},
{ElixirAi.AudioProcessingPG, []}, {ElixirAi.AudioProcessingPG, []},
{DynamicSupervisor, name: ElixirAi.AudioWorkerSupervisor, strategy: :one_for_one}, {DynamicSupervisor, name: ElixirAi.AudioWorkerSupervisor, strategy: :one_for_one},
ElixirAi.ToolTesting, ElixirAi.ToolTesting,
@@ -39,7 +39,6 @@ defmodule ElixirAi.Application do
Supervisor.start_link(children, opts) Supervisor.start_link(children, opts)
end end
@impl true
def config_change(changed, _new, removed) do def config_change(changed, _new, removed) do
ElixirAiWeb.Endpoint.config_change(changed, removed) ElixirAiWeb.Endpoint.config_change(changed, removed)
:ok :ok

View File

@@ -50,6 +50,10 @@ defmodule ElixirAi.ChatRunner do
GenServer.call(via(name), {:session, {:deregister_liveview_pid, liveview_pid}}) GenServer.call(via(name), {:session, {:deregister_liveview_pid, liveview_pid}})
end end
def register_page_tools(name, page_tools) when is_list(page_tools) do
GenServer.call(via(name), {:session, {:register_page_tools, page_tools}})
end
def get_conversation(name) do def get_conversation(name) do
GenServer.call(via(name), {:conversation, :get_conversation}) GenServer.call(via(name), {:conversation, :get_conversation})
end end
@@ -130,6 +134,7 @@ defmodule ElixirAi.ChatRunner do
tool_choice: tool_choice, tool_choice: tool_choice,
server_tools: server_tools, server_tools: server_tools,
liveview_tools: liveview_tools, liveview_tools: liveview_tools,
page_tools: [],
provider: provider, provider: provider,
liveview_pids: %{} liveview_pids: %{}
}} }}

View File

@@ -11,7 +11,7 @@ defmodule ElixirAi.ChatRunner.ConversationCalls do
ElixirAi.ChatUtils.request_ai_response( ElixirAi.ChatUtils.request_ai_response(
self(), self(),
messages_with_system_prompt(new_state.messages, state.system_prompt), messages_with_system_prompt(new_state.messages, state.system_prompt),
state.server_tools ++ state.liveview_tools, state.server_tools ++ state.liveview_tools ++ state.page_tools,
state.provider, state.provider,
effective_tool_choice effective_tool_choice
) )

View File

@@ -10,6 +10,10 @@ defmodule ElixirAi.ChatRunner.LiveviewSession do
{:reply, :ok, %{state | liveview_pids: Map.put(state.liveview_pids, liveview_pid, ref)}} {:reply, :ok, %{state | liveview_pids: Map.put(state.liveview_pids, liveview_pid, ref)}}
end end
def handle_call({:register_page_tools, page_tools}, _from, state) do
{:reply, :ok, %{state | page_tools: page_tools}}
end
def handle_call({:deregister_liveview_pid, liveview_pid}, _from, state) do def handle_call({:deregister_liveview_pid, liveview_pid}, _from, state) do
case Map.pop(state.liveview_pids, liveview_pid) do case Map.pop(state.liveview_pids, liveview_pid) do
{nil, _} -> {nil, _} ->

View File

@@ -111,7 +111,7 @@ defmodule ElixirAi.ChatRunner.StreamHandler do
{failed, pending} -> {failed, pending} ->
with {:ok, decoded_args} <- Jason.decode(tool_call.arguments), with {:ok, decoded_args} <- Jason.decode(tool_call.arguments),
tool when not is_nil(tool) <- tool when not is_nil(tool) <-
Enum.find(state.server_tools ++ state.liveview_tools, fn t -> Enum.find(state.server_tools ++ state.liveview_tools ++ state.page_tools, fn t ->
t.name == tool_call.name t.name == tool_call.name
end) do end) do
tool.run_function.(id, tool_call.id, decoded_args) tool.run_function.(id, tool_call.id, decoded_args)
@@ -160,7 +160,7 @@ defmodule ElixirAi.ChatRunner.StreamHandler do
ElixirAi.ChatUtils.request_ai_response( ElixirAi.ChatUtils.request_ai_response(
self(), self(),
messages_with_system_prompt(state.messages ++ [new_message], state.system_prompt), messages_with_system_prompt(state.messages ++ [new_message], state.system_prompt),
state.server_tools ++ state.liveview_tools, state.server_tools ++ state.liveview_tools ++ state.page_tools,
state.provider, state.provider,
state.tool_choice state.tool_choice
) )

View File

@@ -0,0 +1,15 @@
defmodule ElixirAi.PageToolsPG do
@moduledoc """
Named :pg scope for tracking LiveViews that implement AiControllable.
Group key is `{:page, voice_session_id}` — one group per browser session.
"""
def child_spec(_opts) do
%{
id: __MODULE__,
start: {:pg, :start_link, [__MODULE__]},
type: :worker,
restart: :permanent
}
end
end

View File

@@ -1,5 +1,6 @@
defmodule ElixirAiWeb.ChatLive do defmodule ElixirAiWeb.ChatLive do
use ElixirAiWeb, :live_view use ElixirAiWeb, :live_view
use ElixirAi.AiControllable
require Logger require Logger
import ElixirAiWeb.Spinner import ElixirAiWeb.Spinner
import ElixirAiWeb.ChatMessage import ElixirAiWeb.ChatMessage
@@ -7,6 +8,38 @@ defmodule ElixirAiWeb.ChatLive do
alias ElixirAi.{AiProvider, ChatRunner, ConversationManager} alias ElixirAi.{AiProvider, ChatRunner, ConversationManager}
import ElixirAi.PubsubTopics import ElixirAi.PubsubTopics
@impl ElixirAi.AiControllable
def ai_tools do
[
%{
name: "set_user_input",
description:
"Set the text in the chat input field. Use this to pre-fill a message for the user. " <>
"The user will still need to press Send (or you can describe what you filled in).",
parameters: %{
"type" => "object",
"properties" => %{
"text" => %{
"type" => "string",
"description" => "The text to place in the chat input field"
}
},
"required" => ["text"]
}
}
]
end
@impl ElixirAi.AiControllable
def handle_ai_tool_call("set_user_input", %{"text" => text}, socket) do
{"user input set to: #{text}", assign(socket, user_input: text)}
end
def handle_ai_tool_call(_tool_name, _args, socket) do
{"unknown tool", socket}
end
@impl Phoenix.LiveView
def mount(%{"name" => name}, _session, socket) do def mount(%{"name" => name}, _session, socket) do
case ConversationManager.open_conversation(name) do case ConversationManager.open_conversation(name) do
{:ok, conversation} -> {:ok, conversation} ->
@@ -50,6 +83,7 @@ defmodule ElixirAiWeb.ChatLive do
end end
end end
@impl Phoenix.LiveView
def render(assigns) do def render(assigns) do
~H""" ~H"""
<div class="flex flex-col h-full rounded-lg"> <div class="flex flex-col h-full rounded-lg">
@@ -119,6 +153,7 @@ defmodule ElixirAiWeb.ChatLive do
""" """
end end
@impl Phoenix.LiveView
def handle_event("update_user_input", %{"user_input" => user_input}, socket) do def handle_event("update_user_input", %{"user_input" => user_input}, socket) do
{:noreply, assign(socket, user_input: user_input)} {:noreply, assign(socket, user_input: user_input)}
end end
@@ -293,6 +328,7 @@ defmodule ElixirAiWeb.ChatLive do
{:noreply, assign(socket, background_color: color)} {:noreply, assign(socket, background_color: color)}
end end
@impl Phoenix.LiveView
def terminate(_reason, %{assigns: %{conversation_name: name}} = socket) do def terminate(_reason, %{assigns: %{conversation_name: name}} = socket) do
if connected?(socket) do if connected?(socket) do
ChatRunner.deregister_liveview_pid(name, self()) ChatRunner.deregister_liveview_pid(name, self())

View File

@@ -39,7 +39,7 @@ defmodule ElixirAiWeb.ChatMessage do
def user_message(assigns) do def user_message(assigns) do
~H""" ~H"""
<div class="mb-2 text-sm text-right"> <div class="mb-2 text-sm text-right">
<div class={"w-fit px-3 py-2 rounded-lg bg-seafoam-950 text-seafoam-50 #{max_width_class()} text-left"}> <div class={"ml-auto w-fit px-3 py-2 rounded-lg bg-seafoam-950 text-seafoam-50 #{max_width_class()} text-left"}>
{@content} {@content}
</div> </div>
</div> </div>

View File

@@ -0,0 +1,23 @@
defmodule ElixirAiWeb.Plugs.VoiceSessionId do
@moduledoc """
Ensures a `voice_session_id` exists in the Plug session.
This UUID ties VoiceLive (root layout) to page LiveViews (inner content)
so they can discover each other via `:pg` process groups.
"""
import Plug.Conn
def init(opts), do: opts
def call(conn, _opts) do
case get_session(conn, "voice_session_id") do
nil ->
id = Ecto.UUID.generate()
put_session(conn, "voice_session_id", id)
_existing ->
conn
end
end
end

View File

@@ -4,6 +4,7 @@ defmodule ElixirAiWeb.Router do
pipeline :browser do pipeline :browser do
plug :accepts, ["html"] plug :accepts, ["html"]
plug :fetch_session plug :fetch_session
plug ElixirAiWeb.Plugs.VoiceSessionId
plug :fetch_live_flash plug :fetch_live_flash
plug :put_root_layout, html: {ElixirAiWeb.Layouts, :root} plug :put_root_layout, html: {ElixirAiWeb.Layouts, :root}
plug :protect_from_forgery plug :protect_from_forgery

View File

@@ -4,10 +4,12 @@ defmodule ElixirAiWeb.VoiceLive do
alias ElixirAiWeb.Voice.Recording alias ElixirAiWeb.Voice.Recording
alias ElixirAiWeb.Voice.VoiceConversation alias ElixirAiWeb.Voice.VoiceConversation
alias ElixirAi.{AiProvider, ChatRunner, ConversationManager} alias ElixirAi.{AiProvider, AiTools, ChatRunner, ConversationManager}
import ElixirAi.PubsubTopics import ElixirAi.PubsubTopics
def mount(_params, _session, socket) do def mount(_params, session, socket) do
voice_session_id = session["voice_session_id"]
{:ok, {:ok,
assign(socket, assign(socket,
state: :idle, state: :idle,
@@ -17,7 +19,8 @@ defmodule ElixirAiWeb.VoiceLive do
messages: [], messages: [],
streaming_response: nil, streaming_response: nil,
runner_pid: nil, runner_pid: nil,
ai_error: nil ai_error: nil,
voice_session_id: voice_session_id
), layout: false} ), layout: false}
end end
@@ -98,7 +101,10 @@ defmodule ElixirAiWeb.VoiceLive do
if name do if name do
if socket.assigns.runner_pid do if socket.assigns.runner_pid do
try do try do
GenServer.call(socket.assigns.runner_pid, {:session, {:deregister_liveview_pid, self()}}) GenServer.call(
socket.assigns.runner_pid,
{:session, {:deregister_liveview_pid, self()}}
)
catch catch
:exit, _ -> :ok :exit, _ -> :ok
end end
@@ -307,30 +313,51 @@ defmodule ElixirAiWeb.VoiceLive do
defp connect_and_send(socket, name, conversation, transcription) do defp connect_and_send(socket, name, conversation, transcription) do
runner_pid = Map.get(conversation, :runner_pid) runner_pid = Map.get(conversation, :runner_pid)
if connected?(socket) do try do
Phoenix.PubSub.subscribe(ElixirAi.PubSub, chat_topic(name)) if connected?(socket) do
Phoenix.PubSub.subscribe(ElixirAi.PubSub, chat_topic(name))
if runner_pid, if runner_pid,
do: GenServer.call(runner_pid, {:session, {:register_liveview_pid, self()}}) do: GenServer.call(runner_pid, {:session, {:register_liveview_pid, self()}})
send(self(), :sync_streaming) # Discover and register page tools from AiControllable LiveViews
if runner_pid do
page_tools = discover_and_build_page_tools(socket, runner_pid)
if page_tools != [] do
ChatRunner.register_page_tools(name, page_tools)
end
end
send(self(), :sync_streaming)
end
if runner_pid do
GenServer.cast(runner_pid, {:conversation, {:user_message, transcription, nil}})
else
ChatRunner.new_user_message(name, transcription)
end
assign(socket,
state: :transcribed,
transcription: transcription,
conversation_name: name,
messages: conversation.messages,
streaming_response: conversation.streaming_response,
runner_pid: runner_pid,
ai_error: nil
)
catch
:exit, reason ->
Logger.error("VoiceLive: failed to connect to conversation #{name}: #{inspect(reason)}")
assign(socket,
state: :transcribed,
transcription: transcription,
conversation_name: nil,
ai_error: "Failed to connect to conversation: process unavailable"
)
end end
if runner_pid do
GenServer.cast(runner_pid, {:conversation, {:user_message, transcription, nil}})
else
ChatRunner.new_user_message(name, transcription)
end
assign(socket,
state: :transcribed,
transcription: transcription,
conversation_name: name,
messages: conversation.messages,
streaming_response: conversation.streaming_response,
runner_pid: runner_pid,
ai_error: nil
)
end end
defp get_snapshot(%{assigns: %{runner_pid: pid}}) when is_pid(pid) do defp get_snapshot(%{assigns: %{runner_pid: pid}}) when is_pid(pid) do
@@ -343,4 +370,33 @@ defmodule ElixirAiWeb.VoiceLive do
defp get_snapshot(_socket) do defp get_snapshot(_socket) do
%{id: nil, content: "", reasoning_content: "", tool_calls: []} %{id: nil, content: "", reasoning_content: "", tool_calls: []}
end end
defp discover_and_build_page_tools(socket, runner_pid) do
voice_session_id = socket.assigns.voice_session_id
if voice_session_id == nil, do: throw(:no_session)
page_pids =
try do
:pg.get_members(ElixirAi.PageToolsPG, {:page, voice_session_id})
catch
:error, _ -> []
end
# Ask each page LiveView for its tool specs
Enum.each(page_pids, &send(&1, {:get_ai_tools, self()}))
pids_and_specs =
Enum.reduce(page_pids, [], fn page_pid, acc ->
receive do
{:ai_tools_response, ^page_pid, tools} ->
[{page_pid, tools} | acc]
after
1_000 -> acc
end
end)
AiTools.build_page_tools(runner_pid, pids_and_specs)
catch
:no_session -> []
end
end end