scraper updates
All checks were successful
Build and Deploy / Build & Push Image (push) Successful in 37s

This commit is contained in:
2026-03-16 20:48:04 -06:00
parent 37602fc78a
commit 842f727f4f
3 changed files with 106 additions and 35 deletions

View File

@@ -1,22 +1,84 @@
defmodule CobblemonUi.TierListScraper do
use GenServer
require Logger
@url "https://rankedboost.com/pokemon/tier-list/"
@filename "pokemon_tier_list.json"
def output_file do
dir = System.get_env("CACHE_DIR", ".")
Path.join(dir, @filename)
# ---------------------------------------------------------------------------
# Client API
# ---------------------------------------------------------------------------
def start_link(_opts) do
GenServer.start_link(__MODULE__, [], name: __MODULE__)
end
def run do
@doc "Returns the current tier list map (%{lowercase_name => tier})."
def get_tier_list do
GenServer.call(__MODULE__, :get_tier_list)
end
@doc "Triggers a background re-scrape from HTTP."
def refresh do
GenServer.cast(__MODULE__, :refresh)
end
# ---------------------------------------------------------------------------
# Server callbacks
# ---------------------------------------------------------------------------
@impl true
def init(_opts) do
tier_list = load_from_file()
if map_size(tier_list) == 0 do
Logger.info("[TierListScraper] No cached data found, scheduling HTTP fetch")
send(self(), :fetch)
else
Logger.info("[TierListScraper] Loaded #{map_size(tier_list)} pokemon from cache file")
end
{:ok, tier_list}
end
@impl true
def handle_call(:get_tier_list, _from, state) do
{:reply, state, state}
end
@impl true
def handle_cast(:refresh, state) do
Logger.info("[TierListScraper] Manual refresh requested")
send(self(), :fetch)
{:noreply, state}
end
@impl true
def handle_info(:fetch, state) do
Logger.info("[TierListScraper] Starting scrape from #{@url}")
case run_scrape() do
{:ok, tier_list} ->
Logger.info("[TierListScraper] Updated state with #{map_size(tier_list)} pokemon")
{:noreply, tier_list}
{:error, reason} ->
Logger.error("[TierListScraper] Scrape failed, keeping existing state: #{inspect(reason)}")
{:noreply, state}
end
end
# ---------------------------------------------------------------------------
# Private scrape pipeline
# ---------------------------------------------------------------------------
defp run_scrape do
with {:ok, html} <- fetch_page(),
{:ok, pokemon} <- parse(html),
:ok <- write_json(pokemon) do
tier_list = to_tier_map(pokemon)
Logger.info("[TierListScraper] Successfully scraped and saved #{length(pokemon)} pokemon")
{:ok, pokemon}
{:ok, tier_list}
else
{:error, reason} = err ->
Logger.error("[TierListScraper] Scrape failed: #{inspect(reason)}")
@@ -24,22 +86,25 @@ defmodule CobblemonUi.TierListScraper do
end
end
def fetch_page do
defp fetch_page do
Logger.debug("[TierListScraper] Fetching #{@url}")
case Req.get(@url, headers: [{"user-agent", "Mozilla/5.0 (compatible; CobblemonUI/1.0)"}]) do
{:ok, %Req.Response{status: 200, body: body}} ->
Logger.debug("[TierListScraper] Fetch OK, body size: #{byte_size(body)} bytes")
{:ok, body}
{:ok, %Req.Response{status: status}} ->
Logger.warning("[TierListScraper] Unexpected HTTP status: #{status}")
{:error, {:http_error, status}}
{:error, err} ->
Logger.error("[TierListScraper] HTTP request failed: #{inspect(err)}")
{:error, err}
end
end
def parse(html) do
defp parse(html) do
nodes = html |> Floki.parse_document!() |> Floki.find(".pokemon-tier")
Logger.debug("[TierListScraper] Found #{length(nodes)} .pokemon-tier nodes")
@@ -47,6 +112,7 @@ defmodule CobblemonUi.TierListScraper do
[] ->
Logger.warning("[TierListScraper] No .pokemon-tier elements found — page structure may have changed or content is JS-rendered")
{:error, :no_pokemon_found}
_ ->
pokemon = Enum.map(nodes, &extract_pokemon/1)
valid = Enum.filter(pokemon, fn %{name: n} -> n != "" end)
@@ -71,20 +137,41 @@ defmodule CobblemonUi.TierListScraper do
%{name: name, tier: tier}
end
def write_json(data) do
json = Jason.encode!(data, pretty: true)
File.write(output_file(), json)
defp write_json(data) do
path = output_file()
Logger.debug("[TierListScraper] Writing cache to #{path}")
File.mkdir_p!(Path.dirname(path))
File.write(path, Jason.encode!(data, pretty: true))
end
# Returns a map of %{lowercase_name => tier} for fast lookup, or an empty map if unavailable.
def load_tier_list do
with {:ok, contents} <- File.read(output_file()),
defp load_from_file do
path = output_file()
with {:ok, contents} <- File.read(path),
{:ok, entries} <- Jason.decode(contents) do
Map.new(entries, fn %{"name" => name, "tier" => tier} ->
{String.downcase(name), tier}
end)
map = to_tier_map(entries)
Logger.info("[TierListScraper] Loaded #{map_size(map)} pokemon from #{path}")
map
else
_ -> %{}
{:error, :enoent} ->
Logger.info("[TierListScraper] Cache file not found at #{path}")
%{}
{:error, reason} ->
Logger.warning("[TierListScraper] Failed to load cache file: #{inspect(reason)}")
%{}
end
end
defp to_tier_map(entries) do
Map.new(entries, fn
%{"name" => name, "tier" => tier} -> {String.downcase(name), tier}
%{name: name, tier: tier} -> {String.downcase(name), tier}
end)
end
defp output_file do
dir = System.get_env("CACHE_DIR", ".")
Path.join(dir, @filename)
end
end