Files
elixir-websocket-testing/backend/lib/backend/cluster.ex
2026-03-02 08:40:30 -07:00

79 lines
2.0 KiB
Elixir

defmodule Backend.Cluster do
@moduledoc """
Manages Erlang clustering using native :net_kernel functionality.
Automatically connects to other nodes in the cluster.
"""
use GenServer
require Logger
def start_link(opts) do
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
end
@impl true
def init(_opts) do
# Connect to cluster nodes on startup
Process.send_after(self(), :connect_nodes, 1000)
# Periodically retry connection to handle network issues
schedule_connect()
{:ok, %{}}
end
@impl true
def handle_info(:connect_nodes, state) do
connect_to_cluster()
{:noreply, state}
end
@impl true
def handle_info(:periodic_connect, state) do
connect_to_cluster()
schedule_connect()
{:noreply, state}
end
defp connect_to_cluster do
# Get all cluster nodes and exclude ourselves
all_nodes =
System.get_env("CLUSTER_NODES", "")
|> String.split(",", trim: true)
|> Enum.map(&String.to_atom/1)
cluster_nodes = Enum.reject(all_nodes, &(&1 == node()))
failures =
Enum.filter(cluster_nodes, fn node_name ->
case Node.connect(node_name) do
true ->
# Logger.debug("Connected to node: #{node_name}")
false
false ->
Logger.warning("Failed to connect to node: #{node_name}")
true
:ignored ->
# Already connected, not a failure
false
end
end)
connected_nodes = Node.list()
# Expected count is other nodes (not including ourselves)
expected_count = length(cluster_nodes)
actual_count = length(connected_nodes)
if failures != [] or actual_count < expected_count do
Logger.warning(
"Cluster status: #{actual_count}/#{expected_count} nodes connected: #{inspect(connected_nodes)}"
)
end
end
defp schedule_connect do
# Retry connection every 10 seconds
Process.send_after(self(), :periodic_connect, 10_000)
end
end