From 842f727f4f0a37190724da2cd805bb3e3601a6fd Mon Sep 17 00:00:00 2001 From: Alex Mickelson Date: Mon, 16 Mar 2026 20:48:04 -0600 Subject: [PATCH] scraper updates --- lib/cobblemon_ui/application.ex | 1 + lib/cobblemon_ui/tier_list_scraper.ex | 121 +++++++++++++++++--- lib/cobblemon_ui_web/live/dashboard_live.ex | 19 +-- 3 files changed, 106 insertions(+), 35 deletions(-) diff --git a/lib/cobblemon_ui/application.ex b/lib/cobblemon_ui/application.ex index 1364c92..ea9ffc5 100644 --- a/lib/cobblemon_ui/application.ex +++ b/lib/cobblemon_ui/application.ex @@ -12,6 +12,7 @@ defmodule CobblemonUi.Application do {DNSCluster, query: Application.get_env(:cobblemon_ui, :dns_cluster_query) || :ignore}, {Phoenix.PubSub, name: CobblemonUi.PubSub}, CobblemonUi.CobblemonFS, + CobblemonUi.TierListScraper, # Start to serve requests, typically the last entry CobblemonUiWeb.Endpoint ] diff --git a/lib/cobblemon_ui/tier_list_scraper.ex b/lib/cobblemon_ui/tier_list_scraper.ex index 4ea718d..d74fe03 100644 --- a/lib/cobblemon_ui/tier_list_scraper.ex +++ b/lib/cobblemon_ui/tier_list_scraper.ex @@ -1,22 +1,84 @@ defmodule CobblemonUi.TierListScraper do + use GenServer require Logger @url "https://rankedboost.com/pokemon/tier-list/" @filename "pokemon_tier_list.json" - def output_file do - dir = System.get_env("CACHE_DIR", ".") - Path.join(dir, @filename) + # --------------------------------------------------------------------------- + # Client API + # --------------------------------------------------------------------------- + + def start_link(_opts) do + GenServer.start_link(__MODULE__, [], name: __MODULE__) end - def run do + @doc "Returns the current tier list map (%{lowercase_name => tier})." + def get_tier_list do + GenServer.call(__MODULE__, :get_tier_list) + end + + @doc "Triggers a background re-scrape from HTTP." + def refresh do + GenServer.cast(__MODULE__, :refresh) + end + + # --------------------------------------------------------------------------- + # Server callbacks + # --------------------------------------------------------------------------- + + @impl true + def init(_opts) do + tier_list = load_from_file() + + if map_size(tier_list) == 0 do + Logger.info("[TierListScraper] No cached data found, scheduling HTTP fetch") + send(self(), :fetch) + else + Logger.info("[TierListScraper] Loaded #{map_size(tier_list)} pokemon from cache file") + end + + {:ok, tier_list} + end + + @impl true + def handle_call(:get_tier_list, _from, state) do + {:reply, state, state} + end + + @impl true + def handle_cast(:refresh, state) do + Logger.info("[TierListScraper] Manual refresh requested") + send(self(), :fetch) + {:noreply, state} + end + + @impl true + def handle_info(:fetch, state) do Logger.info("[TierListScraper] Starting scrape from #{@url}") + case run_scrape() do + {:ok, tier_list} -> + Logger.info("[TierListScraper] Updated state with #{map_size(tier_list)} pokemon") + {:noreply, tier_list} + + {:error, reason} -> + Logger.error("[TierListScraper] Scrape failed, keeping existing state: #{inspect(reason)}") + {:noreply, state} + end + end + + # --------------------------------------------------------------------------- + # Private scrape pipeline + # --------------------------------------------------------------------------- + + defp run_scrape do with {:ok, html} <- fetch_page(), {:ok, pokemon} <- parse(html), :ok <- write_json(pokemon) do + tier_list = to_tier_map(pokemon) Logger.info("[TierListScraper] Successfully scraped and saved #{length(pokemon)} pokemon") - {:ok, pokemon} + {:ok, tier_list} else {:error, reason} = err -> Logger.error("[TierListScraper] Scrape failed: #{inspect(reason)}") @@ -24,22 +86,25 @@ defmodule CobblemonUi.TierListScraper do end end - def fetch_page do + defp fetch_page do Logger.debug("[TierListScraper] Fetching #{@url}") + case Req.get(@url, headers: [{"user-agent", "Mozilla/5.0 (compatible; CobblemonUI/1.0)"}]) do {:ok, %Req.Response{status: 200, body: body}} -> Logger.debug("[TierListScraper] Fetch OK, body size: #{byte_size(body)} bytes") {:ok, body} + {:ok, %Req.Response{status: status}} -> Logger.warning("[TierListScraper] Unexpected HTTP status: #{status}") {:error, {:http_error, status}} + {:error, err} -> Logger.error("[TierListScraper] HTTP request failed: #{inspect(err)}") {:error, err} end end - def parse(html) do + defp parse(html) do nodes = html |> Floki.parse_document!() |> Floki.find(".pokemon-tier") Logger.debug("[TierListScraper] Found #{length(nodes)} .pokemon-tier nodes") @@ -47,6 +112,7 @@ defmodule CobblemonUi.TierListScraper do [] -> Logger.warning("[TierListScraper] No .pokemon-tier elements found — page structure may have changed or content is JS-rendered") {:error, :no_pokemon_found} + _ -> pokemon = Enum.map(nodes, &extract_pokemon/1) valid = Enum.filter(pokemon, fn %{name: n} -> n != "" end) @@ -71,20 +137,41 @@ defmodule CobblemonUi.TierListScraper do %{name: name, tier: tier} end - def write_json(data) do - json = Jason.encode!(data, pretty: true) - File.write(output_file(), json) + defp write_json(data) do + path = output_file() + Logger.debug("[TierListScraper] Writing cache to #{path}") + File.mkdir_p!(Path.dirname(path)) + File.write(path, Jason.encode!(data, pretty: true)) end - # Returns a map of %{lowercase_name => tier} for fast lookup, or an empty map if unavailable. - def load_tier_list do - with {:ok, contents} <- File.read(output_file()), + defp load_from_file do + path = output_file() + + with {:ok, contents} <- File.read(path), {:ok, entries} <- Jason.decode(contents) do - Map.new(entries, fn %{"name" => name, "tier" => tier} -> - {String.downcase(name), tier} - end) + map = to_tier_map(entries) + Logger.info("[TierListScraper] Loaded #{map_size(map)} pokemon from #{path}") + map else - _ -> %{} + {:error, :enoent} -> + Logger.info("[TierListScraper] Cache file not found at #{path}") + %{} + + {:error, reason} -> + Logger.warning("[TierListScraper] Failed to load cache file: #{inspect(reason)}") + %{} end end + + defp to_tier_map(entries) do + Map.new(entries, fn + %{"name" => name, "tier" => tier} -> {String.downcase(name), tier} + %{name: name, tier: tier} -> {String.downcase(name), tier} + end) + end + + defp output_file do + dir = System.get_env("CACHE_DIR", ".") + Path.join(dir, @filename) + end end diff --git a/lib/cobblemon_ui_web/live/dashboard_live.ex b/lib/cobblemon_ui_web/live/dashboard_live.ex index 4d466ff..93081f0 100644 --- a/lib/cobblemon_ui_web/live/dashboard_live.ex +++ b/lib/cobblemon_ui_web/live/dashboard_live.ex @@ -1,8 +1,6 @@ defmodule CobblemonUiWeb.DashboardLive do use CobblemonUiWeb, :live_view - require Logger - import CobblemonUiWeb.PokemonComponents import CobblemonUiWeb.BattleComponents @@ -10,7 +8,6 @@ defmodule CobblemonUiWeb.DashboardLive do def mount(_params, _session, socket) do if connected?(socket) do :timer.send_interval(1000, self(), :tick) - unless File.exists?(CobblemonUi.TierListScraper.output_file()), do: send(self(), :scrape_tier_list) end players = @@ -26,7 +23,7 @@ defmodule CobblemonUiWeb.DashboardLive do player_data: nil, battle: nil, selected_pokemon: nil, - tier_list: CobblemonUi.TierListScraper.load_tier_list(), + tier_list: CobblemonUi.TierListScraper.get_tier_list(), view_mode: :party, loading: false, error: nil @@ -102,20 +99,6 @@ defmodule CobblemonUiWeb.DashboardLive do {:noreply, do_refresh(socket)} end - def handle_info(:scrape_tier_list, socket) do - lv = self() - Task.start(fn -> - case CobblemonUi.TierListScraper.run() do - {:ok, _} -> send(lv, :reload_tier_list) - {:error, reason} -> Logger.error("[DashboardLive] Tier list scrape failed: #{inspect(reason)}") - end - end) - {:noreply, socket} - end - - def handle_info(:reload_tier_list, socket) do - {:noreply, assign(socket, tier_list: CobblemonUi.TierListScraper.load_tier_list())} - end defp do_refresh(socket) do players =