From 8a27774c237f560a137611c9e59b4097baa8a0dd Mon Sep 17 00:00:00 2001 From: Manabu Niseki Date: Sat, 7 Mar 2026 07:23:31 +0900 Subject: [PATCH 1/2] fix: handle non JSON error --- src/urlscan/client.py | 100 +++++++++++++++++++++----------------- tests/unit/test_client.py | 21 ++++++++ 2 files changed, 76 insertions(+), 45 deletions(-) diff --git a/src/urlscan/client.py b/src/urlscan/client.py index b1a1f06..4845dfa 100644 --- a/src/urlscan/client.py +++ b/src/urlscan/client.py @@ -370,57 +370,67 @@ def get_text(self, path: str, params: QueryParamTypes | None = None) -> str: res = self._get(path, params=params) return self._response_to_str(res) - def _get_error(self, res: ClientResponse) -> APIError | None: - try: - res.raise_for_status() - except httpx.HTTPStatusError as exc: - data: dict = exc.response.json() - message: str = data["message"] - description: str | None = data.get("description") - code: str | None = data.get("code") - type_: str | None = data.get("type") - # fallback to HTTP status code if "status" is missing - status: int = data.get("status") or exc.response.status_code - - # ref. https://urlscan.io/docs/api/#ratelimit - if status == 429: - rate_limit_reset_after = float( - exc.response.headers.get("X-Rate-Limit-Reset-After", 0) - ) - return RateLimitError( - message, - description=description, - status=status, - rate_limit_reset_after=rate_limit_reset_after, - ) - - def mapper(d: dict) -> ItemError: - title: str = d["title"] - status: int = d["status"] - code: str | None = d.get("code") - description: str | None = d.get("description") - detail: str | None = d.get("detail") - return ItemError( - title=title, - description=description, - detail=detail, - status=status, - code=code, - ) - - errors: list[ItemError] | None = None - if "errors" in data: - errors = [mapper(item) for item in data["errors"]] - - return APIError( + def _map_http_status_error(self, exc: httpx.HTTPStatusError) -> APIError: + data: dict = exc.response.json() + message: str = data["message"] + description: str | None = data.get("description") + code: str | None = data.get("code") + type_: str | None = data.get("type") + # fallback to HTTP status code if "status" is missing + status: int = data.get("status") or exc.response.status_code + + # ref. https://urlscan.io/docs/api/#ratelimit + if status == 429: + rate_limit_reset_after = float( + exc.response.headers.get("X-Rate-Limit-Reset-After", 0) + ) + return RateLimitError( message, description=description, status=status, + rate_limit_reset_after=rate_limit_reset_after, + ) + + def mapper(d: dict) -> ItemError: + title: str = d["title"] + status: int = d["status"] + code: str | None = d.get("code") + description: str | None = d.get("description") + detail: str | None = d.get("detail") + return ItemError( + title=title, + description=description, + detail=detail, + status=status, code=code, - type_=type_, - errors=errors, ) + errors: list[ItemError] | None = None + if "errors" in data: + errors = [mapper(item) for item in data["errors"]] + + return APIError( + message, + description=description, + status=status, + code=code, + type_=type_, + errors=errors, + ) + + def _get_error(self, res: ClientResponse) -> APIError | None: + try: + res.raise_for_status() + except httpx.HTTPStatusError as exc: + try: + return self._map_http_status_error(exc) + except (json.JSONDecodeError, UnicodeDecodeError): + # when error response is not JSON + return APIError( + message=exc.response.text, + status=exc.response.status_code, + ) + return None def _response_to_json(self, res: ClientResponse) -> dict: diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 463fb43..cb0fda0 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -532,6 +532,27 @@ def test_error_3(client: Client, httpserver: HTTPServer): assert exc.errors is not None +def test_error_4(client: Client, httpserver: HTTPServer): + # non JSON error + httpserver.expect_request( + "/error", + method="GET", + ).respond_with_data( + "Internal Server Error", + status=500, + content_type="text/plain", + ) + with pytest.raises(APIError) as exc_info: + client.get_json("/error") + + exc = exc_info.value + assert exc.status == 500 + assert exc.message == "Internal Server Error" + assert exc.code is None + assert exc.type is None + assert exc.errors is None + + def test_get_response(client: Client, httpserver: HTTPServer): httpserver.expect_request("/responses/dummy/", method="GET").respond_with_data( "dummy", content_type="text/plain" From 153682f465ea1d2486a5c000855360e2701d8dcd Mon Sep 17 00:00:00 2001 From: Manabu Niseki Date: Sat, 7 Mar 2026 07:23:51 +0900 Subject: [PATCH 2/2] fix: use #get_content to handle error --- src/urlscan/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/urlscan/client.py b/src/urlscan/client.py index 4845dfa..93dc7ed 100644 --- a/src/urlscan/client.py +++ b/src/urlscan/client.py @@ -486,9 +486,9 @@ def get_screenshot(self, uuid: str) -> BytesIO: https://urlscan.io/docs/api/#screenshot """ - res = self._get(f"/screenshots/{uuid}.png") - bio = BytesIO(res.content) - bio.name = res.basename + res = self.get_content(f"/screenshots/{uuid}.png") + bio = BytesIO(res) + bio.name = f"{uuid}.png" return bio def get_dom(self, uuid: str) -> str: