scraper
All checks were successful
Build and Deploy / Build & Push Image (push) Successful in 37s

This commit is contained in:
2026-03-16 20:44:47 -06:00
parent 91d2c327de
commit 37602fc78a
3 changed files with 58 additions and 28 deletions

View File

@@ -1,4 +1,6 @@
defmodule CobblemonUi.TierListScraper do
require Logger
@url "https://rankedboost.com/pokemon/tier-list/"
@filename "pokemon_tier_list.json"
@@ -8,26 +10,49 @@ defmodule CobblemonUi.TierListScraper do
end
def run do
Logger.info("[TierListScraper] Starting scrape from #{@url}")
with {:ok, html} <- fetch_page(),
pokemon <- parse(html),
{:ok, pokemon} <- parse(html),
:ok <- write_json(pokemon) do
Logger.info("[TierListScraper] Successfully scraped and saved #{length(pokemon)} pokemon")
{:ok, pokemon}
else
{:error, reason} = err ->
Logger.error("[TierListScraper] Scrape failed: #{inspect(reason)}")
err
end
end
def fetch_page do
case Req.get(@url) do
{:ok, %Req.Response{status: 200, body: body}} -> {:ok, body}
{:ok, %Req.Response{status: status}} -> {:error, {:http_error, status}}
{:error, err} -> {:error, err}
Logger.debug("[TierListScraper] Fetching #{@url}")
case Req.get(@url, headers: [{"user-agent", "Mozilla/5.0 (compatible; CobblemonUI/1.0)"}]) do
{:ok, %Req.Response{status: 200, body: body}} ->
Logger.debug("[TierListScraper] Fetch OK, body size: #{byte_size(body)} bytes")
{:ok, body}
{:ok, %Req.Response{status: status}} ->
Logger.warning("[TierListScraper] Unexpected HTTP status: #{status}")
{:error, {:http_error, status}}
{:error, err} ->
Logger.error("[TierListScraper] HTTP request failed: #{inspect(err)}")
{:error, err}
end
end
def parse(html) do
html
|> Floki.parse_document!()
|> Floki.find(".pokemon-tier")
|> Enum.map(&extract_pokemon/1)
nodes = html |> Floki.parse_document!() |> Floki.find(".pokemon-tier")
Logger.debug("[TierListScraper] Found #{length(nodes)} .pokemon-tier nodes")
case nodes do
[] ->
Logger.warning("[TierListScraper] No .pokemon-tier elements found — page structure may have changed or content is JS-rendered")
{:error, :no_pokemon_found}
_ ->
pokemon = Enum.map(nodes, &extract_pokemon/1)
valid = Enum.filter(pokemon, fn %{name: n} -> n != "" end)
Logger.info("[TierListScraper] Parsed #{length(valid)} valid pokemon (#{length(pokemon) - length(valid)} skipped with empty names)")
{:ok, valid}
end
end
defp extract_pokemon(node) do