From d857e91241a5475e2bb3f57a6442023d4c45b4b7 Mon Sep 17 00:00:00 2001 From: Alex Mickelson Date: Wed, 25 Mar 2026 09:22:48 -0600 Subject: [PATCH] general solution to voice control --- lib/elixir_ai/ai_controllable.ex | 68 +++++++++++ lib/elixir_ai/ai_controllable/hook.ex | 35 ++++++ lib/elixir_ai/ai_tools.ex | 36 ++++++ lib/elixir_ai/application.ex | 3 +- lib/elixir_ai/chat_runner/chat_runner.ex | 5 + .../chat_runner/conversation_calls.ex | 2 +- lib/elixir_ai/chat_runner/liveview_session.ex | 4 + lib/elixir_ai/chat_runner/stream_handler.ex | 4 +- lib/elixir_ai/page_tools_pg.ex | 15 +++ lib/elixir_ai_web/chat/chat_live.ex | 36 ++++++ lib/elixir_ai_web/chat/chat_message.ex | 2 +- lib/elixir_ai_web/plugs/voice_session_id.ex | 23 ++++ lib/elixir_ai_web/router.ex | 1 + lib/elixir_ai_web/voice/voice_live.ex | 106 +++++++++++++----- 14 files changed, 309 insertions(+), 31 deletions(-) create mode 100644 lib/elixir_ai/ai_controllable.ex create mode 100644 lib/elixir_ai/ai_controllable/hook.ex create mode 100644 lib/elixir_ai/page_tools_pg.ex create mode 100644 lib/elixir_ai_web/plugs/voice_session_id.ex diff --git a/lib/elixir_ai/ai_controllable.ex b/lib/elixir_ai/ai_controllable.ex new file mode 100644 index 0000000..775e6e6 --- /dev/null +++ b/lib/elixir_ai/ai_controllable.ex @@ -0,0 +1,68 @@ +defmodule ElixirAi.AiControllable do + @moduledoc """ + Behaviour + macro for LiveViews that expose AI-controllable tools. + + Any LiveView that `use`s this module must implement: + + - `ai_tools/0` — returns a list of tool spec maps + - `handle_ai_tool_call(tool_name, args, socket)` — handles a dispatched tool call, + returns `{result_string, socket}`. + + The macro injects: + + - A `handle_info` clause that dispatches `{:page_tool_call, tool_name, args, reply_to}` + messages to the callback and sends the result back to the caller. + - An `on_mount` hook registration that joins the `:pg` group keyed by + `voice_session_id` so VoiceLive can discover sibling page LiveViews. + + ## Usage + + defmodule MyAppWeb.SomeLive do + use MyAppWeb, :live_view + use ElixirAi.AiControllable + + @impl ElixirAi.AiControllable + def ai_tools do + [ + %{ + name: "do_something", + description: "Does something useful", + parameters: %{ + "type" => "object", + "properties" => %{"value" => %{"type" => "string"}}, + "required" => ["value"] + } + } + ] + end + + @impl ElixirAi.AiControllable + def handle_ai_tool_call("do_something", %{"value" => val}, socket) do + {"done: \#{val}", assign(socket, value: val)} + end + end + """ + + @callback ai_tools() :: [map()] + @callback handle_ai_tool_call(tool_name :: String.t(), args :: map(), socket :: term()) :: + {String.t(), term()} + + defmacro __using__(_opts) do + quote do + @behaviour ElixirAi.AiControllable + + on_mount ElixirAi.AiControllable.Hook + + def handle_info({:page_tool_call, tool_name, args, reply_to}, socket) do + {result, socket} = handle_ai_tool_call(tool_name, args, socket) + send(reply_to, {:page_tool_result, tool_name, result}) + {:noreply, socket} + end + + def handle_info({:get_ai_tools, reply_to}, socket) do + send(reply_to, {:ai_tools_response, self(), ai_tools()}) + {:noreply, socket} + end + end + end +end diff --git a/lib/elixir_ai/ai_controllable/hook.ex b/lib/elixir_ai/ai_controllable/hook.ex new file mode 100644 index 0000000..496c17b --- /dev/null +++ b/lib/elixir_ai/ai_controllable/hook.ex @@ -0,0 +1,35 @@ +defmodule ElixirAi.AiControllable.Hook do + @moduledoc """ + LiveView on_mount hook that registers a page LiveView in the + `:ai_page_tools` pg group so VoiceLive can discover it. + + The group key is `{:page, voice_session_id}` where `voice_session_id` + comes from the Plug session, tying the page LiveView to the same browser + tab as VoiceLive. + + Only joins when the LiveView module implements `ai_tools/0` + (i.e. uses `ElixirAi.AiControllable`). + """ + + import Phoenix.LiveView + import Phoenix.Component, only: [assign: 3] + + def on_mount(:default, _params, session, socket) do + voice_session_id = session["voice_session_id"] + module = socket.view + + if voice_session_id && function_exported?(module, :ai_tools, 0) do + if connected?(socket) do + try do + :pg.join(ElixirAi.PageToolsPG, {:page, voice_session_id}, self()) + catch + :exit, _ -> :ok + end + end + + {:cont, assign(socket, :voice_session_id, voice_session_id)} + else + {:cont, socket} + end + end +end diff --git a/lib/elixir_ai/ai_tools.ex b/lib/elixir_ai/ai_tools.ex index e477aea..93c7571 100644 --- a/lib/elixir_ai/ai_tools.ex +++ b/lib/elixir_ai/ai_tools.ex @@ -126,6 +126,42 @@ defmodule ElixirAi.AiTools do ) end + # --------------------------------------------------------------------------- + # Page tools (dynamic, from AiControllable LiveViews) + # --------------------------------------------------------------------------- + + @doc """ + Builds tool structs for page tools discovered from AiControllable LiveViews. + + Each entry in `pids_and_specs` is `{page_pid, [tool_spec, ...]}` where + `tool_spec` is a map with `:name`, `:description`, and `:parameters`. + + The generated function sends `{:page_tool_call, name, args, self()}` to + the page LiveView pid and blocks (inside a Task) waiting for the reply. + """ + def build_page_tools(server, pids_and_specs) do + Enum.flat_map(pids_and_specs, fn {page_pid, tool_specs} -> + Enum.map(tool_specs, fn spec -> + ai_tool( + name: spec.name, + description: spec.description, + function: fn args -> + send(page_pid, {:page_tool_call, spec.name, args, self()}) + + receive do + {:page_tool_result, tool_name, result} when tool_name == spec.name -> + {:ok, result} + after + 5_000 -> {:ok, "page tool #{spec.name} timed out"} + end + end, + parameters: spec.parameters, + server: server + ) + end) + end) + end + # --------------------------------------------------------------------------- # Private # --------------------------------------------------------------------------- diff --git a/lib/elixir_ai/application.ex b/lib/elixir_ai/application.ex index 005720b..194cfaa 100644 --- a/lib/elixir_ai/application.ex +++ b/lib/elixir_ai/application.ex @@ -2,7 +2,6 @@ defmodule ElixirAi.Application do @moduledoc false use Application - @impl true def start(_type, _args) do children = [ ElixirAiWeb.Telemetry, @@ -13,6 +12,7 @@ defmodule ElixirAi.Application do [Application.get_env(:libcluster, :topologies, []), [name: ElixirAi.ClusterSupervisor]]}, {Phoenix.PubSub, name: ElixirAi.PubSub}, {ElixirAi.LiveViewPG, []}, + {ElixirAi.PageToolsPG, []}, {ElixirAi.AudioProcessingPG, []}, {DynamicSupervisor, name: ElixirAi.AudioWorkerSupervisor, strategy: :one_for_one}, ElixirAi.ToolTesting, @@ -39,7 +39,6 @@ defmodule ElixirAi.Application do Supervisor.start_link(children, opts) end - @impl true def config_change(changed, _new, removed) do ElixirAiWeb.Endpoint.config_change(changed, removed) :ok diff --git a/lib/elixir_ai/chat_runner/chat_runner.ex b/lib/elixir_ai/chat_runner/chat_runner.ex index 6c64f0b..e0ce86a 100644 --- a/lib/elixir_ai/chat_runner/chat_runner.ex +++ b/lib/elixir_ai/chat_runner/chat_runner.ex @@ -50,6 +50,10 @@ defmodule ElixirAi.ChatRunner do GenServer.call(via(name), {:session, {:deregister_liveview_pid, liveview_pid}}) end + def register_page_tools(name, page_tools) when is_list(page_tools) do + GenServer.call(via(name), {:session, {:register_page_tools, page_tools}}) + end + def get_conversation(name) do GenServer.call(via(name), {:conversation, :get_conversation}) end @@ -130,6 +134,7 @@ defmodule ElixirAi.ChatRunner do tool_choice: tool_choice, server_tools: server_tools, liveview_tools: liveview_tools, + page_tools: [], provider: provider, liveview_pids: %{} }} diff --git a/lib/elixir_ai/chat_runner/conversation_calls.ex b/lib/elixir_ai/chat_runner/conversation_calls.ex index 12d2969..5b74b13 100644 --- a/lib/elixir_ai/chat_runner/conversation_calls.ex +++ b/lib/elixir_ai/chat_runner/conversation_calls.ex @@ -11,7 +11,7 @@ defmodule ElixirAi.ChatRunner.ConversationCalls do ElixirAi.ChatUtils.request_ai_response( self(), messages_with_system_prompt(new_state.messages, state.system_prompt), - state.server_tools ++ state.liveview_tools, + state.server_tools ++ state.liveview_tools ++ state.page_tools, state.provider, effective_tool_choice ) diff --git a/lib/elixir_ai/chat_runner/liveview_session.ex b/lib/elixir_ai/chat_runner/liveview_session.ex index 027bc38..82178dd 100644 --- a/lib/elixir_ai/chat_runner/liveview_session.ex +++ b/lib/elixir_ai/chat_runner/liveview_session.ex @@ -10,6 +10,10 @@ defmodule ElixirAi.ChatRunner.LiveviewSession do {:reply, :ok, %{state | liveview_pids: Map.put(state.liveview_pids, liveview_pid, ref)}} end + def handle_call({:register_page_tools, page_tools}, _from, state) do + {:reply, :ok, %{state | page_tools: page_tools}} + end + def handle_call({:deregister_liveview_pid, liveview_pid}, _from, state) do case Map.pop(state.liveview_pids, liveview_pid) do {nil, _} -> diff --git a/lib/elixir_ai/chat_runner/stream_handler.ex b/lib/elixir_ai/chat_runner/stream_handler.ex index c2c8acc..393c901 100644 --- a/lib/elixir_ai/chat_runner/stream_handler.ex +++ b/lib/elixir_ai/chat_runner/stream_handler.ex @@ -111,7 +111,7 @@ defmodule ElixirAi.ChatRunner.StreamHandler do {failed, pending} -> with {:ok, decoded_args} <- Jason.decode(tool_call.arguments), tool when not is_nil(tool) <- - Enum.find(state.server_tools ++ state.liveview_tools, fn t -> + Enum.find(state.server_tools ++ state.liveview_tools ++ state.page_tools, fn t -> t.name == tool_call.name end) do tool.run_function.(id, tool_call.id, decoded_args) @@ -160,7 +160,7 @@ defmodule ElixirAi.ChatRunner.StreamHandler do ElixirAi.ChatUtils.request_ai_response( self(), messages_with_system_prompt(state.messages ++ [new_message], state.system_prompt), - state.server_tools ++ state.liveview_tools, + state.server_tools ++ state.liveview_tools ++ state.page_tools, state.provider, state.tool_choice ) diff --git a/lib/elixir_ai/page_tools_pg.ex b/lib/elixir_ai/page_tools_pg.ex new file mode 100644 index 0000000..3ca8fde --- /dev/null +++ b/lib/elixir_ai/page_tools_pg.ex @@ -0,0 +1,15 @@ +defmodule ElixirAi.PageToolsPG do + @moduledoc """ + Named :pg scope for tracking LiveViews that implement AiControllable. + Group key is `{:page, voice_session_id}` — one group per browser session. + """ + + def child_spec(_opts) do + %{ + id: __MODULE__, + start: {:pg, :start_link, [__MODULE__]}, + type: :worker, + restart: :permanent + } + end +end diff --git a/lib/elixir_ai_web/chat/chat_live.ex b/lib/elixir_ai_web/chat/chat_live.ex index 43a7894..ed769b3 100644 --- a/lib/elixir_ai_web/chat/chat_live.ex +++ b/lib/elixir_ai_web/chat/chat_live.ex @@ -1,5 +1,6 @@ defmodule ElixirAiWeb.ChatLive do use ElixirAiWeb, :live_view + use ElixirAi.AiControllable require Logger import ElixirAiWeb.Spinner import ElixirAiWeb.ChatMessage @@ -7,6 +8,38 @@ defmodule ElixirAiWeb.ChatLive do alias ElixirAi.{AiProvider, ChatRunner, ConversationManager} import ElixirAi.PubsubTopics + @impl ElixirAi.AiControllable + def ai_tools do + [ + %{ + name: "set_user_input", + description: + "Set the text in the chat input field. Use this to pre-fill a message for the user. " <> + "The user will still need to press Send (or you can describe what you filled in).", + parameters: %{ + "type" => "object", + "properties" => %{ + "text" => %{ + "type" => "string", + "description" => "The text to place in the chat input field" + } + }, + "required" => ["text"] + } + } + ] + end + + @impl ElixirAi.AiControllable + def handle_ai_tool_call("set_user_input", %{"text" => text}, socket) do + {"user input set to: #{text}", assign(socket, user_input: text)} + end + + def handle_ai_tool_call(_tool_name, _args, socket) do + {"unknown tool", socket} + end + + @impl Phoenix.LiveView def mount(%{"name" => name}, _session, socket) do case ConversationManager.open_conversation(name) do {:ok, conversation} -> @@ -50,6 +83,7 @@ defmodule ElixirAiWeb.ChatLive do end end + @impl Phoenix.LiveView def render(assigns) do ~H"""
@@ -119,6 +153,7 @@ defmodule ElixirAiWeb.ChatLive do """ end + @impl Phoenix.LiveView def handle_event("update_user_input", %{"user_input" => user_input}, socket) do {:noreply, assign(socket, user_input: user_input)} end @@ -293,6 +328,7 @@ defmodule ElixirAiWeb.ChatLive do {:noreply, assign(socket, background_color: color)} end + @impl Phoenix.LiveView def terminate(_reason, %{assigns: %{conversation_name: name}} = socket) do if connected?(socket) do ChatRunner.deregister_liveview_pid(name, self()) diff --git a/lib/elixir_ai_web/chat/chat_message.ex b/lib/elixir_ai_web/chat/chat_message.ex index 7775e7a..2ab8824 100644 --- a/lib/elixir_ai_web/chat/chat_message.ex +++ b/lib/elixir_ai_web/chat/chat_message.ex @@ -39,7 +39,7 @@ defmodule ElixirAiWeb.ChatMessage do def user_message(assigns) do ~H"""
-
+
{@content}
diff --git a/lib/elixir_ai_web/plugs/voice_session_id.ex b/lib/elixir_ai_web/plugs/voice_session_id.ex new file mode 100644 index 0000000..1d886e5 --- /dev/null +++ b/lib/elixir_ai_web/plugs/voice_session_id.ex @@ -0,0 +1,23 @@ +defmodule ElixirAiWeb.Plugs.VoiceSessionId do + @moduledoc """ + Ensures a `voice_session_id` exists in the Plug session. + + This UUID ties VoiceLive (root layout) to page LiveViews (inner content) + so they can discover each other via `:pg` process groups. + """ + + import Plug.Conn + + def init(opts), do: opts + + def call(conn, _opts) do + case get_session(conn, "voice_session_id") do + nil -> + id = Ecto.UUID.generate() + put_session(conn, "voice_session_id", id) + + _existing -> + conn + end + end +end diff --git a/lib/elixir_ai_web/router.ex b/lib/elixir_ai_web/router.ex index 0cfb23e..d4da4e8 100644 --- a/lib/elixir_ai_web/router.ex +++ b/lib/elixir_ai_web/router.ex @@ -4,6 +4,7 @@ defmodule ElixirAiWeb.Router do pipeline :browser do plug :accepts, ["html"] plug :fetch_session + plug ElixirAiWeb.Plugs.VoiceSessionId plug :fetch_live_flash plug :put_root_layout, html: {ElixirAiWeb.Layouts, :root} plug :protect_from_forgery diff --git a/lib/elixir_ai_web/voice/voice_live.ex b/lib/elixir_ai_web/voice/voice_live.ex index 1ac55e1..471aa3f 100644 --- a/lib/elixir_ai_web/voice/voice_live.ex +++ b/lib/elixir_ai_web/voice/voice_live.ex @@ -4,10 +4,12 @@ defmodule ElixirAiWeb.VoiceLive do alias ElixirAiWeb.Voice.Recording alias ElixirAiWeb.Voice.VoiceConversation - alias ElixirAi.{AiProvider, ChatRunner, ConversationManager} + alias ElixirAi.{AiProvider, AiTools, ChatRunner, ConversationManager} import ElixirAi.PubsubTopics - def mount(_params, _session, socket) do + def mount(_params, session, socket) do + voice_session_id = session["voice_session_id"] + {:ok, assign(socket, state: :idle, @@ -17,7 +19,8 @@ defmodule ElixirAiWeb.VoiceLive do messages: [], streaming_response: nil, runner_pid: nil, - ai_error: nil + ai_error: nil, + voice_session_id: voice_session_id ), layout: false} end @@ -98,7 +101,10 @@ defmodule ElixirAiWeb.VoiceLive do if name do if socket.assigns.runner_pid do try do - GenServer.call(socket.assigns.runner_pid, {:session, {:deregister_liveview_pid, self()}}) + GenServer.call( + socket.assigns.runner_pid, + {:session, {:deregister_liveview_pid, self()}} + ) catch :exit, _ -> :ok end @@ -307,30 +313,51 @@ defmodule ElixirAiWeb.VoiceLive do defp connect_and_send(socket, name, conversation, transcription) do runner_pid = Map.get(conversation, :runner_pid) - if connected?(socket) do - Phoenix.PubSub.subscribe(ElixirAi.PubSub, chat_topic(name)) + try do + if connected?(socket) do + Phoenix.PubSub.subscribe(ElixirAi.PubSub, chat_topic(name)) - if runner_pid, - do: GenServer.call(runner_pid, {:session, {:register_liveview_pid, self()}}) + if runner_pid, + do: GenServer.call(runner_pid, {:session, {:register_liveview_pid, self()}}) - send(self(), :sync_streaming) + # Discover and register page tools from AiControllable LiveViews + if runner_pid do + page_tools = discover_and_build_page_tools(socket, runner_pid) + + if page_tools != [] do + ChatRunner.register_page_tools(name, page_tools) + end + end + + send(self(), :sync_streaming) + end + + if runner_pid do + GenServer.cast(runner_pid, {:conversation, {:user_message, transcription, nil}}) + else + ChatRunner.new_user_message(name, transcription) + end + + assign(socket, + state: :transcribed, + transcription: transcription, + conversation_name: name, + messages: conversation.messages, + streaming_response: conversation.streaming_response, + runner_pid: runner_pid, + ai_error: nil + ) + catch + :exit, reason -> + Logger.error("VoiceLive: failed to connect to conversation #{name}: #{inspect(reason)}") + + assign(socket, + state: :transcribed, + transcription: transcription, + conversation_name: nil, + ai_error: "Failed to connect to conversation: process unavailable" + ) end - - if runner_pid do - GenServer.cast(runner_pid, {:conversation, {:user_message, transcription, nil}}) - else - ChatRunner.new_user_message(name, transcription) - end - - assign(socket, - state: :transcribed, - transcription: transcription, - conversation_name: name, - messages: conversation.messages, - streaming_response: conversation.streaming_response, - runner_pid: runner_pid, - ai_error: nil - ) end defp get_snapshot(%{assigns: %{runner_pid: pid}}) when is_pid(pid) do @@ -343,4 +370,33 @@ defmodule ElixirAiWeb.VoiceLive do defp get_snapshot(_socket) do %{id: nil, content: "", reasoning_content: "", tool_calls: []} end + + defp discover_and_build_page_tools(socket, runner_pid) do + voice_session_id = socket.assigns.voice_session_id + if voice_session_id == nil, do: throw(:no_session) + + page_pids = + try do + :pg.get_members(ElixirAi.PageToolsPG, {:page, voice_session_id}) + catch + :error, _ -> [] + end + + # Ask each page LiveView for its tool specs + Enum.each(page_pids, &send(&1, {:get_ai_tools, self()})) + + pids_and_specs = + Enum.reduce(page_pids, [], fn page_pid, acc -> + receive do + {:ai_tools_response, ^page_pid, tools} -> + [{page_pid, tools} | acc] + after + 1_000 -> acc + end + end) + + AiTools.build_page_tools(runner_pid, pids_and_specs) + catch + :no_session -> [] + end end