This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
defmodule CobblemonUi.TierListScraper do
|
||||
require Logger
|
||||
|
||||
@url "https://rankedboost.com/pokemon/tier-list/"
|
||||
@filename "pokemon_tier_list.json"
|
||||
|
||||
@@ -8,26 +10,49 @@ defmodule CobblemonUi.TierListScraper do
|
||||
end
|
||||
|
||||
def run do
|
||||
Logger.info("[TierListScraper] Starting scrape from #{@url}")
|
||||
|
||||
with {:ok, html} <- fetch_page(),
|
||||
pokemon <- parse(html),
|
||||
{:ok, pokemon} <- parse(html),
|
||||
:ok <- write_json(pokemon) do
|
||||
Logger.info("[TierListScraper] Successfully scraped and saved #{length(pokemon)} pokemon")
|
||||
{:ok, pokemon}
|
||||
else
|
||||
{:error, reason} = err ->
|
||||
Logger.error("[TierListScraper] Scrape failed: #{inspect(reason)}")
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
def fetch_page do
|
||||
case Req.get(@url) do
|
||||
{:ok, %Req.Response{status: 200, body: body}} -> {:ok, body}
|
||||
{:ok, %Req.Response{status: status}} -> {:error, {:http_error, status}}
|
||||
{:error, err} -> {:error, err}
|
||||
Logger.debug("[TierListScraper] Fetching #{@url}")
|
||||
case Req.get(@url, headers: [{"user-agent", "Mozilla/5.0 (compatible; CobblemonUI/1.0)"}]) do
|
||||
{:ok, %Req.Response{status: 200, body: body}} ->
|
||||
Logger.debug("[TierListScraper] Fetch OK, body size: #{byte_size(body)} bytes")
|
||||
{:ok, body}
|
||||
{:ok, %Req.Response{status: status}} ->
|
||||
Logger.warning("[TierListScraper] Unexpected HTTP status: #{status}")
|
||||
{:error, {:http_error, status}}
|
||||
{:error, err} ->
|
||||
Logger.error("[TierListScraper] HTTP request failed: #{inspect(err)}")
|
||||
{:error, err}
|
||||
end
|
||||
end
|
||||
|
||||
def parse(html) do
|
||||
html
|
||||
|> Floki.parse_document!()
|
||||
|> Floki.find(".pokemon-tier")
|
||||
|> Enum.map(&extract_pokemon/1)
|
||||
nodes = html |> Floki.parse_document!() |> Floki.find(".pokemon-tier")
|
||||
Logger.debug("[TierListScraper] Found #{length(nodes)} .pokemon-tier nodes")
|
||||
|
||||
case nodes do
|
||||
[] ->
|
||||
Logger.warning("[TierListScraper] No .pokemon-tier elements found — page structure may have changed or content is JS-rendered")
|
||||
{:error, :no_pokemon_found}
|
||||
_ ->
|
||||
pokemon = Enum.map(nodes, &extract_pokemon/1)
|
||||
valid = Enum.filter(pokemon, fn %{name: n} -> n != "" end)
|
||||
Logger.info("[TierListScraper] Parsed #{length(valid)} valid pokemon (#{length(pokemon) - length(valid)} skipped with empty names)")
|
||||
{:ok, valid}
|
||||
end
|
||||
end
|
||||
|
||||
defp extract_pokemon(node) do
|
||||
|
||||
Reference in New Issue
Block a user