From 461bd59cf159cd780010d7c45e8f0aa6dd873f3c Mon Sep 17 00:00:00 2001 From: Daniel Szoke Date: Mon, 11 Mar 2024 10:52:30 +0100 Subject: [PATCH 1/9] ref: Improve scrub_dict typing (#2768) This change improves the typing of the scrub_dict method. Previously, the scrub_dict method's type hints indicated that only dict[str, Any] was accepted as the parameter. However, the method is actually implemented to accept any object, since it checks the types of the parameters at runtime. Therefore, object is a more appropriate type hint for the parameter. #2753 depends on this change for mypy to pass --- sentry_sdk/scrubber.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/sentry_sdk/scrubber.py b/sentry_sdk/scrubber.py index a6c55af4fd..3f089ab8f6 100644 --- a/sentry_sdk/scrubber.py +++ b/sentry_sdk/scrubber.py @@ -1,3 +1,8 @@ +try: + from typing import cast +except ImportError: + cast = lambda _, obj: obj + from sentry_sdk.utils import ( capture_internal_exceptions, AnnotatedValue, @@ -8,8 +13,6 @@ if TYPE_CHECKING: from sentry_sdk._types import Event - from typing import Any - from typing import Dict from typing import List from typing import Optional @@ -66,7 +69,7 @@ def __init__(self, denylist=None, recursive=False): self.recursive = recursive def scrub_list(self, lst): - # type: (List[Any]) -> None + # type: (object) -> None """ If a list is passed to this method, the method recursively searches the list and any nested lists for any dictionaries. The method calls scrub_dict on all dictionaries @@ -77,24 +80,28 @@ def scrub_list(self, lst): return for v in lst: - if isinstance(v, dict): - self.scrub_dict(v) - elif isinstance(v, list): - self.scrub_list(v) + self.scrub_dict(v) # no-op unless v is a dict + self.scrub_list(v) # no-op unless v is a list def scrub_dict(self, d): - # type: (Dict[str, Any]) -> None + # type: (object) -> None + """ + If a dictionary is passed to this method, the method scrubs the dictionary of any + sensitive data. The method calls itself recursively on any nested dictionaries ( + including dictionaries nested in lists) if self.recursive is True. + This method does nothing if the parameter passed to it is not a dictionary. + """ if not isinstance(d, dict): return for k, v in d.items(): - if isinstance(k, string_types) and k.lower() in self.denylist: + # The cast is needed because mypy is not smart enough to figure out that k must be a + # string after the isinstance check. + if isinstance(k, string_types) and cast(str, k).lower() in self.denylist: d[k] = AnnotatedValue.substituted_because_contains_sensitive_data() elif self.recursive: - if isinstance(v, dict): - self.scrub_dict(v) - elif isinstance(v, list): - self.scrub_list(v) + self.scrub_dict(v) # no-op unless v is a dict + self.scrub_list(v) # no-op unless v is a list def scrub_request(self, event): # type: (Event) -> None From 46a632d10a382312707bd4af2d016934b202e129 Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Mon, 11 Mar 2024 14:23:53 +0100 Subject: [PATCH 2/9] Propagate sentry-trace and baggage to huey tasks (#2792) This PR enables passing `sentry-trace` and `baggage` headers to background tasks using the Huey task queue. This allows easily correlating what happens inside a background task with whatever transaction (e.g. a user request in a Django application) queued the task in the first place. Periodic tasks do not get these headers, because otherwise each execution of the periodic task would be tied to the same parent trace (the long-running worker process). --- Co-authored-by: Anton Pirker --- sentry_sdk/integrations/huey.py | 24 ++++++++++++++++++++---- tests/integrations/huey/test_huey.py | 18 ++++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/integrations/huey.py b/sentry_sdk/integrations/huey.py index 9641160099..43c03936b1 100644 --- a/sentry_sdk/integrations/huey.py +++ b/sentry_sdk/integrations/huey.py @@ -6,10 +6,15 @@ from sentry_sdk._compat import reraise from sentry_sdk._types import TYPE_CHECKING from sentry_sdk import Hub +from sentry_sdk.api import continue_trace, get_baggage, get_traceparent from sentry_sdk.consts import OP from sentry_sdk.hub import _should_send_default_pii from sentry_sdk.integrations import DidNotEnable, Integration -from sentry_sdk.tracing import Transaction, TRANSACTION_SOURCE_TASK +from sentry_sdk.tracing import ( + BAGGAGE_HEADER_NAME, + SENTRY_TRACE_HEADER_NAME, + TRANSACTION_SOURCE_TASK, +) from sentry_sdk.utils import ( capture_internal_exceptions, event_from_exception, @@ -25,7 +30,7 @@ F = TypeVar("F", bound=Callable[..., Any]) try: - from huey.api import Huey, Result, ResultGroup, Task + from huey.api import Huey, Result, ResultGroup, Task, PeriodicTask from huey.exceptions import CancelExecution, RetryTask, TaskLockedException except ImportError: raise DidNotEnable("Huey is not installed") @@ -56,6 +61,14 @@ def _sentry_enqueue(self, task): return old_enqueue(self, task) with hub.start_span(op=OP.QUEUE_SUBMIT_HUEY, description=task.name): + if not isinstance(task, PeriodicTask): + # Attach trace propagation data to task kwargs. We do + # not do this for periodic tasks, as these don't + # really have an originating transaction. + task.kwargs["sentry_headers"] = { + BAGGAGE_HEADER_NAME: get_baggage(), + SENTRY_TRACE_HEADER_NAME: get_traceparent(), + } return old_enqueue(self, task) Huey.enqueue = _sentry_enqueue @@ -145,12 +158,15 @@ def _sentry_execute(self, task, timestamp=None): scope.clear_breadcrumbs() scope.add_event_processor(_make_event_processor(task)) - transaction = Transaction( + sentry_headers = task.kwargs.pop("sentry_headers", None) + + transaction = continue_trace( + sentry_headers or {}, name=task.name, - status="ok", op=OP.QUEUE_TASK_HUEY, source=TRANSACTION_SOURCE_TASK, ) + transaction.set_status("ok") if not getattr(task, "_sentry_is_patched", False): task.execute = _wrap_task_execute(task.execute) diff --git a/tests/integrations/huey/test_huey.py b/tests/integrations/huey/test_huey.py index 0bebd91b19..48a3da97f4 100644 --- a/tests/integrations/huey/test_huey.py +++ b/tests/integrations/huey/test_huey.py @@ -172,3 +172,21 @@ def dummy_task(): assert len(event["spans"]) assert event["spans"][0]["op"] == "queue.submit.huey" assert event["spans"][0]["description"] == "different_task_name" + + +def test_huey_propagate_trace(init_huey, capture_events): + huey = init_huey() + + events = capture_events() + + @huey.task() + def propagated_trace_task(): + pass + + with start_transaction() as outer_transaction: + execute_huey_task(huey, propagated_trace_task) + + assert ( + events[0]["transaction"] == "propagated_trace_task" + ) # the "inner" transaction + assert events[0]["contexts"]["trace"]["trace_id"] == outer_transaction.trace_id From ff0a94b5f1c1eb5063f99aca8b9e267e86a6a177 Mon Sep 17 00:00:00 2001 From: colin-sentry <161344340+colin-sentry@users.noreply.github.com> Date: Mon, 11 Mar 2024 10:06:02 -0400 Subject: [PATCH 3/9] OpenAI integration (#2791) * OpenAI integration * Fix linting errors * Fix CI * Fix lint * Fix more CI issues * Run tests on version pinned OpenAI too * Fix pydantic issue in test * Import type in TYPE_CHECKING gate * PR feedback fixes * Fix tiktoken test variant * PII gate the request and response * Rename set_data tags * Move doc location * Add "exclude prompts" flag as optional * Change prompts to be excluded by default * Set flag in tests * Fix tiktoken tox.ini extra dash * Change strip PII semantics * More test coverage for PII * notiktoken --------- Co-authored-by: Anton Pirker --- .../test-integrations-data-processing.yml | 14 +- mypy.ini | 2 + .../split-tox-gh-actions.py | 1 + sentry_sdk/consts.py | 2 + sentry_sdk/integrations/__init__.py | 1 + sentry_sdk/integrations/openai.py | 279 ++++++++++++++++++ setup.py | 1 + tests/integrations/openai/__init__.py | 3 + tests/integrations/openai/test_openai.py | 231 +++++++++++++++ tox.ini | 13 + 10 files changed, 546 insertions(+), 1 deletion(-) create mode 100644 sentry_sdk/integrations/openai.py create mode 100644 tests/integrations/openai/__init__.py create mode 100644 tests/integrations/openai/test_openai.py diff --git a/.github/workflows/test-integrations-data-processing.yml b/.github/workflows/test-integrations-data-processing.yml index ddac93d1e5..c40d45845d 100644 --- a/.github/workflows/test-integrations-data-processing.yml +++ b/.github/workflows/test-integrations-data-processing.yml @@ -25,7 +25,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.5","3.7","3.8","3.11","3.12"] + python-version: ["3.5","3.7","3.8","3.9","3.11","3.12"] # python3.6 reached EOL and is no longer being supported on # new versions of hosted runners on Github Actions # ubuntu-20.04 is the last version that supported python3.6 @@ -58,6 +58,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh "py${{ matrix.python-version }}-huey-latest" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + - name: Test openai latest + run: | + set -x # print commands that are executed + ./scripts/runtox.sh "py${{ matrix.python-version }}-openai-latest" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch - name: Test rq latest run: | set -x # print commands that are executed @@ -110,6 +114,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-huey" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + - name: Test openai pinned + run: | + set -x # print commands that are executed + ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-openai" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch - name: Test rq pinned run: | set -x # print commands that are executed @@ -151,6 +159,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh --exclude-latest "py2.7-huey" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + - name: Test openai py27 + run: | + set -x # print commands that are executed + ./scripts/runtox.sh --exclude-latest "py2.7-openai" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch - name: Test rq py27 run: | set -x # print commands that are executed diff --git a/mypy.ini b/mypy.ini index fef90c867e..c1444d61e5 100644 --- a/mypy.ini +++ b/mypy.ini @@ -67,6 +67,8 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-huey.*] ignore_missing_imports = True +[mypy-openai.*] +ignore_missing_imports = True [mypy-arq.*] ignore_missing_imports = True [mypy-grpc.*] diff --git a/scripts/split-tox-gh-actions/split-tox-gh-actions.py b/scripts/split-tox-gh-actions/split-tox-gh-actions.py index f8beffc219..13b81283ca 100755 --- a/scripts/split-tox-gh-actions/split-tox-gh-actions.py +++ b/scripts/split-tox-gh-actions/split-tox-gh-actions.py @@ -70,6 +70,7 @@ "beam", "celery", "huey", + "openai", "rq", ], "Databases": [ diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 2b58aecc24..e4edfddef1 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -219,6 +219,8 @@ class OP: MIDDLEWARE_STARLITE = "middleware.starlite" MIDDLEWARE_STARLITE_RECEIVE = "middleware.starlite.receive" MIDDLEWARE_STARLITE_SEND = "middleware.starlite.send" + OPENAI_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.openai" + OPENAI_EMBEDDINGS_CREATE = "ai.embeddings.create.openai" QUEUE_SUBMIT_ARQ = "queue.submit.arq" QUEUE_TASK_ARQ = "queue.task.arq" QUEUE_SUBMIT_CELERY = "queue.submit.celery" diff --git a/sentry_sdk/integrations/__init__.py b/sentry_sdk/integrations/__init__.py index 21f7188ff1..c9737ae589 100644 --- a/sentry_sdk/integrations/__init__.py +++ b/sentry_sdk/integrations/__init__.py @@ -78,6 +78,7 @@ def iter_default_integrations(with_auto_enabling_integrations): "sentry_sdk.integrations.fastapi.FastApiIntegration", "sentry_sdk.integrations.flask.FlaskIntegration", "sentry_sdk.integrations.httpx.HttpxIntegration", + "sentry_sdk.integrations.openai.OpenAIIntegration", "sentry_sdk.integrations.pyramid.PyramidIntegration", "sentry_sdk.integrations.redis.RedisIntegration", "sentry_sdk.integrations.rq.RqIntegration", diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py new file mode 100644 index 0000000000..5c05a43916 --- /dev/null +++ b/sentry_sdk/integrations/openai.py @@ -0,0 +1,279 @@ +from sentry_sdk import consts +from sentry_sdk._types import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any, Iterable, List, Optional, Callable, Iterator + from sentry_sdk.tracing import Span + +import sentry_sdk +from sentry_sdk._functools import wraps +from sentry_sdk.hub import Hub, _should_send_default_pii +from sentry_sdk.integrations import DidNotEnable, Integration +from sentry_sdk.utils import logger, capture_internal_exceptions, event_from_exception + +try: + from openai.resources.chat.completions import Completions + from openai.resources import Embeddings + + if TYPE_CHECKING: + from openai.types.chat import ChatCompletionMessageParam, ChatCompletionChunk +except ImportError: + raise DidNotEnable("OpenAI not installed") + +try: + import tiktoken # type: ignore + + enc = tiktoken.get_encoding("cl100k_base") + + def count_tokens(s): + # type: (str) -> int + return len(enc.encode_ordinary(s)) + + logger.debug("[OpenAI] using tiktoken to count tokens") +except ImportError: + logger.info( + "The Sentry Python SDK requires 'tiktoken' in order to measure token usage from some OpenAI APIs" + "Please install 'tiktoken' if you aren't receiving token usage in Sentry." + "See https://docs.sentry.io/platforms/python/integrations/openai/ for more information." + ) + + def count_tokens(s): + # type: (str) -> int + return 0 + + +COMPLETION_TOKENS_USED = "ai.completion_tоkens.used" +PROMPT_TOKENS_USED = "ai.prompt_tоkens.used" +TOTAL_TOKENS_USED = "ai.total_tоkens.used" + + +class OpenAIIntegration(Integration): + identifier = "openai" + + def __init__(self, include_prompts=True): + # type: (OpenAIIntegration, bool) -> None + self.include_prompts = include_prompts + + @staticmethod + def setup_once(): + # type: () -> None + Completions.create = _wrap_chat_completion_create(Completions.create) + Embeddings.create = _wrap_embeddings_create(Embeddings.create) + + +def _capture_exception(hub, exc): + # type: (Hub, Any) -> None + + if hub.client is not None: + event, hint = event_from_exception( + exc, + client_options=hub.client.options, + mechanism={"type": "openai", "handled": False}, + ) + hub.capture_event(event, hint=hint) + + +def _calculate_chat_completion_usage( + messages, response, span, streaming_message_responses=None +): + # type: (Iterable[ChatCompletionMessageParam], Any, Span, Optional[List[str]]) -> None + completion_tokens = 0 + prompt_tokens = 0 + total_tokens = 0 + if hasattr(response, "usage"): + if hasattr(response.usage, "completion_tokens") and isinstance( + response.usage.completion_tokens, int + ): + completion_tokens = response.usage.completion_tokens + if hasattr(response.usage, "prompt_tokens") and isinstance( + response.usage.prompt_tokens, int + ): + prompt_tokens = response.usage.prompt_tokens + if hasattr(response.usage, "total_tokens") and isinstance( + response.usage.total_tokens, int + ): + total_tokens = response.usage.total_tokens + + if prompt_tokens == 0: + for message in messages: + if "content" in message: + prompt_tokens += count_tokens(message["content"]) + + if completion_tokens == 0: + if streaming_message_responses is not None: + for message in streaming_message_responses: + completion_tokens += count_tokens(message) + elif hasattr(response, "choices"): + for choice in response.choices: + if hasattr(choice, "message"): + completion_tokens += count_tokens(choice.message) + + if total_tokens == 0: + total_tokens = prompt_tokens + completion_tokens + + if completion_tokens != 0: + span.set_data(COMPLETION_TOKENS_USED, completion_tokens) + if prompt_tokens != 0: + span.set_data(PROMPT_TOKENS_USED, prompt_tokens) + if total_tokens != 0: + span.set_data(TOTAL_TOKENS_USED, total_tokens) + + +def _wrap_chat_completion_create(f): + # type: (Callable[..., Any]) -> Callable[..., Any] + @wraps(f) + def new_chat_completion(*args, **kwargs): + # type: (*Any, **Any) -> Any + hub = Hub.current + if not hub: + return f(*args, **kwargs) + + integration = hub.get_integration(OpenAIIntegration) # type: OpenAIIntegration + if not integration: + return f(*args, **kwargs) + + if "messages" not in kwargs: + # invalid call (in all versions of openai), let it return error + return f(*args, **kwargs) + + try: + iter(kwargs["messages"]) + except TypeError: + # invalid call (in all versions), messages must be iterable + return f(*args, **kwargs) + + kwargs["messages"] = list(kwargs["messages"]) + messages = kwargs["messages"] + model = kwargs.get("model") + streaming = kwargs.get("stream") + + span = sentry_sdk.start_span( + op=consts.OP.OPENAI_CHAT_COMPLETIONS_CREATE, description="Chat Completion" + ) + span.__enter__() + try: + res = f(*args, **kwargs) + except Exception as e: + _capture_exception(Hub.current, e) + span.__exit__(None, None, None) + raise e from None + + with capture_internal_exceptions(): + if _should_send_default_pii() and integration.include_prompts: + span.set_data("ai.input_messages", messages) + span.set_data("ai.model_id", model) + span.set_data("ai.streaming", streaming) + + if hasattr(res, "choices"): + if _should_send_default_pii() and integration.include_prompts: + span.set_data( + "ai.responses", list(map(lambda x: x.message, res.choices)) + ) + _calculate_chat_completion_usage(messages, res, span) + span.__exit__(None, None, None) + elif hasattr(res, "_iterator"): + data_buf: list[list[str]] = [] # one for each choice + + old_iterator = res._iterator # type: Iterator[ChatCompletionChunk] + + def new_iterator(): + # type: () -> Iterator[ChatCompletionChunk] + with capture_internal_exceptions(): + for x in old_iterator: + if hasattr(x, "choices"): + choice_index = 0 + for choice in x.choices: + if hasattr(choice, "delta") and hasattr( + choice.delta, "content" + ): + content = choice.delta.content + if len(data_buf) <= choice_index: + data_buf.append([]) + data_buf[choice_index].append(content or "") + choice_index += 1 + yield x + if len(data_buf) > 0: + all_responses = list( + map(lambda chunk: "".join(chunk), data_buf) + ) + if ( + _should_send_default_pii() + and integration.include_prompts + ): + span.set_data("ai.responses", all_responses) + _calculate_chat_completion_usage( + messages, res, span, all_responses + ) + span.__exit__(None, None, None) + + res._iterator = new_iterator() + else: + span.set_data("unknown_response", True) + span.__exit__(None, None, None) + return res + + return new_chat_completion + + +def _wrap_embeddings_create(f): + # type: (Callable[..., Any]) -> Callable[..., Any] + + @wraps(f) + def new_embeddings_create(*args, **kwargs): + # type: (*Any, **Any) -> Any + + hub = Hub.current + if not hub: + return f(*args, **kwargs) + + integration = hub.get_integration(OpenAIIntegration) # type: OpenAIIntegration + if not integration: + return f(*args, **kwargs) + + with sentry_sdk.start_span( + op=consts.OP.OPENAI_EMBEDDINGS_CREATE, + description="OpenAI Embedding Creation", + ) as span: + if "input" in kwargs and ( + _should_send_default_pii() and integration.include_prompts + ): + if isinstance(kwargs["input"], str): + span.set_data("ai.input_messages", [kwargs["input"]]) + elif ( + isinstance(kwargs["input"], list) + and len(kwargs["input"]) > 0 + and isinstance(kwargs["input"][0], str) + ): + span.set_data("ai.input_messages", kwargs["input"]) + if "model" in kwargs: + span.set_data("ai.model_id", kwargs["model"]) + try: + response = f(*args, **kwargs) + except Exception as e: + _capture_exception(Hub.current, e) + raise e from None + + prompt_tokens = 0 + total_tokens = 0 + if hasattr(response, "usage"): + if hasattr(response.usage, "prompt_tokens") and isinstance( + response.usage.prompt_tokens, int + ): + prompt_tokens = response.usage.prompt_tokens + if hasattr(response.usage, "total_tokens") and isinstance( + response.usage.total_tokens, int + ): + total_tokens = response.usage.total_tokens + + if prompt_tokens == 0: + prompt_tokens = count_tokens(kwargs["input"] or "") + + if total_tokens == 0: + total_tokens = prompt_tokens + + span.set_data(PROMPT_TOKENS_USED, prompt_tokens) + span.set_data(TOTAL_TOKENS_USED, total_tokens) + + return response + + return new_embeddings_create diff --git a/setup.py b/setup.py index 0af275d6af..0299bf91fb 100644 --- a/setup.py +++ b/setup.py @@ -60,6 +60,7 @@ def get_file_text(file_name): "httpx": ["httpx>=0.16.0"], "huey": ["huey>=2"], "loguru": ["loguru>=0.5"], + "openai": ["openai>=1.0.0", "tiktoken>=0.3.0"], "opentelemetry": ["opentelemetry-distro>=0.35b0"], "opentelemetry-experimental": [ "opentelemetry-distro~=0.40b0", diff --git a/tests/integrations/openai/__init__.py b/tests/integrations/openai/__init__.py new file mode 100644 index 0000000000..d6cc3d5505 --- /dev/null +++ b/tests/integrations/openai/__init__.py @@ -0,0 +1,3 @@ +import pytest + +pytest.importorskip("openai") diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py new file mode 100644 index 0000000000..ecdedd2694 --- /dev/null +++ b/tests/integrations/openai/test_openai.py @@ -0,0 +1,231 @@ +import pytest +from openai import OpenAI, Stream, OpenAIError +from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding +from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk +from openai.types.chat.chat_completion import Choice +from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice +from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage + +from sentry_sdk import start_transaction +from sentry_sdk.integrations.openai import ( + OpenAIIntegration, + COMPLETION_TOKENS_USED, + PROMPT_TOKENS_USED, + TOTAL_TOKENS_USED, +) + +from unittest import mock # python 3.3 and above + + +EXAMPLE_CHAT_COMPLETION = ChatCompletion( + id="chat-id", + choices=[ + Choice( + index=0, + finish_reason="stop", + message=ChatCompletionMessage( + role="assistant", content="the model response" + ), + ) + ], + created=10000000, + model="model-id", + object="chat.completion", + usage=CompletionUsage( + completion_tokens=10, + prompt_tokens=20, + total_tokens=30, + ), +) + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_nonstreaming_chat_completion( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + + with start_transaction(name="openai tx"): + response = ( + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + .choices[0] + .message.content + ) + + assert response == "the model response" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "ai.chat_completions.create.openai" + + if send_default_pii and include_prompts: + assert "hello" in span["data"]["ai.input_messages"][0]["content"] + assert "the model response" in span["data"]["ai.responses"][0] + else: + assert "ai.input_messages" not in span["data"] + assert "ai.responses" not in span["data"] + + assert span["data"][COMPLETION_TOKENS_USED] == 10 + assert span["data"][PROMPT_TOKENS_USED] == 20 + assert span["data"][TOTAL_TOKENS_USED] == 30 + + +# noinspection PyTypeChecker +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_streaming_chat_completion( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = OpenAI(api_key="z") + returned_stream = Stream(cast_to=None, response=None, client=None) + returned_stream._iterator = [ + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=0, delta=ChoiceDelta(content="hel"), finish_reason=None + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=1, delta=ChoiceDelta(content="lo "), finish_reason=None + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[ + DeltaChoice( + index=2, delta=ChoiceDelta(content="world"), finish_reason="stop" + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ] + + client.chat.completions._post = mock.Mock(return_value=returned_stream) + with start_transaction(name="openai tx"): + response_stream = client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + response_string = "".join( + map(lambda x: x.choices[0].delta.content, response_stream) + ) + assert response_string == "hello world" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "ai.chat_completions.create.openai" + + if send_default_pii and include_prompts: + assert "hello" in span["data"]["ai.input_messages"][0]["content"] + assert "hello world" in span["data"]["ai.responses"][0] + else: + assert "ai.input_messages" not in span["data"] + assert "ai.responses" not in span["data"] + + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import + + assert span["data"][COMPLETION_TOKENS_USED] == 2 + assert span["data"][PROMPT_TOKENS_USED] == 1 + assert span["data"][TOTAL_TOKENS_USED] == 3 + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly + + +def test_bad_chat_completion(sentry_init, capture_events): + sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock( + side_effect=OpenAIError("API rate limit reached") + ) + with pytest.raises(OpenAIError): + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + (event,) = events + assert event["level"] == "error" + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_embeddings_create( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = OpenAI(api_key="z") + + returned_embedding = CreateEmbeddingResponse( + data=[Embedding(object="embedding", index=0, embedding=[1.0, 2.0, 3.0])], + model="some-model", + object="list", + usage=EmbeddingTokenUsage( + prompt_tokens=20, + total_tokens=30, + ), + ) + + client.embeddings._post = mock.Mock(return_value=returned_embedding) + with start_transaction(name="openai tx"): + response = client.embeddings.create( + input="hello", model="text-embedding-3-large" + ) + + assert len(response.data[0].embedding) == 3 + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "ai.embeddings.create.openai" + if send_default_pii and include_prompts: + assert "hello" in span["data"]["ai.input_messages"][0] + else: + assert "ai.input_messages" not in span["data"] + + assert span["data"][PROMPT_TOKENS_USED] == 20 + assert span["data"][TOTAL_TOKENS_USED] == 30 diff --git a/tox.ini b/tox.ini index a23251f186..1e7ba06a00 100644 --- a/tox.ini +++ b/tox.ini @@ -146,6 +146,11 @@ envlist = {py3.5,py3.11,py3.12}-loguru-v{0.5} {py3.5,py3.11,py3.12}-loguru-latest + # OpenAI + {py3.9,py3.11,py3.12}-openai-v1 + {py3.9,py3.11,py3.12}-openai-latest + {py3.9,py3.11,py3.12}-openai-notiktoken + # OpenTelemetry (OTel) {py3.7,py3.9,py3.11,py3.12}-opentelemetry @@ -439,6 +444,13 @@ deps = loguru-v0.5: loguru~=0.5.0 loguru-latest: loguru + # OpenAI + openai-v1: openai~=1.0.0 + openai-v1: tiktoken~=0.6.0 + openai-latest: openai + openai-latest: tiktoken~=0.6.0 + openai-notiktoken: openai + # OpenTelemetry (OTel) opentelemetry: opentelemetry-distro @@ -597,6 +609,7 @@ setenv = httpx: TESTPATH=tests/integrations/httpx huey: TESTPATH=tests/integrations/huey loguru: TESTPATH=tests/integrations/loguru + openai: TESTPATH=tests/integrations/openai opentelemetry: TESTPATH=tests/integrations/opentelemetry pure_eval: TESTPATH=tests/integrations/pure_eval pymongo: TESTPATH=tests/integrations/pymongo From f40e27f16ef4285563a52f1889808e669126a381 Mon Sep 17 00:00:00 2001 From: colin-sentry <161344340+colin-sentry@users.noreply.github.com> Date: Tue, 12 Mar 2024 07:13:16 -0400 Subject: [PATCH 4/9] Add a method for normalizing data passed to set_data (#2800) --- sentry_sdk/integrations/openai.py | 55 +++++++++++++++++------- tests/integrations/openai/test_openai.py | 2 +- 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 5c05a43916..0e71029b60 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -73,6 +73,28 @@ def _capture_exception(hub, exc): hub.capture_event(event, hint=hint) +def _normalize_data(data): + # type: (Any) -> Any + + # convert pydantic data (e.g. OpenAI v1+) to json compatible format + if hasattr(data, "model_dump"): + try: + return data.model_dump() + except Exception as e: + logger.warning("Could not convert pydantic data to JSON: %s", e) + return data + if isinstance(data, list): + return list(_normalize_data(x) for x in data) + if isinstance(data, dict): + return {k: _normalize_data(v) for (k, v) in data.items()} + return data + + +def set_data_normalized(span, key, value): + # type: (Span, str, Any) -> None + span.set_data(key, _normalize_data(value)) + + def _calculate_chat_completion_usage( messages, response, span, streaming_message_responses=None ): @@ -112,11 +134,11 @@ def _calculate_chat_completion_usage( total_tokens = prompt_tokens + completion_tokens if completion_tokens != 0: - span.set_data(COMPLETION_TOKENS_USED, completion_tokens) + set_data_normalized(span, COMPLETION_TOKENS_USED, completion_tokens) if prompt_tokens != 0: - span.set_data(PROMPT_TOKENS_USED, prompt_tokens) + set_data_normalized(span, PROMPT_TOKENS_USED, prompt_tokens) if total_tokens != 0: - span.set_data(TOTAL_TOKENS_USED, total_tokens) + set_data_normalized(span, TOTAL_TOKENS_USED, total_tokens) def _wrap_chat_completion_create(f): @@ -160,14 +182,17 @@ def new_chat_completion(*args, **kwargs): with capture_internal_exceptions(): if _should_send_default_pii() and integration.include_prompts: - span.set_data("ai.input_messages", messages) - span.set_data("ai.model_id", model) - span.set_data("ai.streaming", streaming) + set_data_normalized(span, "ai.input_messages", messages) + + set_data_normalized(span, "ai.model_id", model) + set_data_normalized(span, "ai.streaming", streaming) if hasattr(res, "choices"): if _should_send_default_pii() and integration.include_prompts: - span.set_data( - "ai.responses", list(map(lambda x: x.message, res.choices)) + set_data_normalized( + span, + "ai.responses", + list(map(lambda x: x.message, res.choices)), ) _calculate_chat_completion_usage(messages, res, span) span.__exit__(None, None, None) @@ -200,7 +225,7 @@ def new_iterator(): _should_send_default_pii() and integration.include_prompts ): - span.set_data("ai.responses", all_responses) + set_data_normalized(span, "ai.responses", all_responses) _calculate_chat_completion_usage( messages, res, span, all_responses ) @@ -208,7 +233,7 @@ def new_iterator(): res._iterator = new_iterator() else: - span.set_data("unknown_response", True) + set_data_normalized(span, "unknown_response", True) span.__exit__(None, None, None) return res @@ -238,15 +263,15 @@ def new_embeddings_create(*args, **kwargs): _should_send_default_pii() and integration.include_prompts ): if isinstance(kwargs["input"], str): - span.set_data("ai.input_messages", [kwargs["input"]]) + set_data_normalized(span, "ai.input_messages", [kwargs["input"]]) elif ( isinstance(kwargs["input"], list) and len(kwargs["input"]) > 0 and isinstance(kwargs["input"][0], str) ): - span.set_data("ai.input_messages", kwargs["input"]) + set_data_normalized(span, "ai.input_messages", kwargs["input"]) if "model" in kwargs: - span.set_data("ai.model_id", kwargs["model"]) + set_data_normalized(span, "ai.model_id", kwargs["model"]) try: response = f(*args, **kwargs) except Exception as e: @@ -271,8 +296,8 @@ def new_embeddings_create(*args, **kwargs): if total_tokens == 0: total_tokens = prompt_tokens - span.set_data(PROMPT_TOKENS_USED, prompt_tokens) - span.set_data(TOTAL_TOKENS_USED, total_tokens) + set_data_normalized(span, PROMPT_TOKENS_USED, prompt_tokens) + set_data_normalized(span, TOTAL_TOKENS_USED, total_tokens) return response diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index ecdedd2694..d9a239e004 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -73,7 +73,7 @@ def test_nonstreaming_chat_completion( if send_default_pii and include_prompts: assert "hello" in span["data"]["ai.input_messages"][0]["content"] - assert "the model response" in span["data"]["ai.responses"][0] + assert "the model response" in span["data"]["ai.responses"][0]["content"] else: assert "ai.input_messages" not in span["data"] assert "ai.responses" not in span["data"] From 1a8db5e99e54265b7bd7c176de10d3f202388bc7 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 12 Mar 2024 15:23:56 +0100 Subject: [PATCH 5/9] Discard open spans after 10 minutes (#2801) OTel spans that are handled in the Sentry span processor can never be finished/closed. This leads to a memory leak. This change makes sure that open spans will be removed from memory after 10 minutes to prevent memory usage from growing constantly. Fixes #2722 --------- Co-authored-by: Daniel Szoke --- .../opentelemetry/span_processor.py | 50 +++++++++- .../opentelemetry/test_span_processor.py | 92 +++++++++++++++++++ 2 files changed, 139 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/opentelemetry/span_processor.py b/sentry_sdk/integrations/opentelemetry/span_processor.py index 0ed4e7f709..0db698e239 100644 --- a/sentry_sdk/integrations/opentelemetry/span_processor.py +++ b/sentry_sdk/integrations/opentelemetry/span_processor.py @@ -1,3 +1,5 @@ +from time import time + from opentelemetry.context import get_value # type: ignore from opentelemetry.sdk.trace import SpanProcessor # type: ignore from opentelemetry.semconv.trace import SpanAttributes # type: ignore @@ -33,6 +35,7 @@ from sentry_sdk._types import Event, Hint OPEN_TELEMETRY_CONTEXT = "otel" +SPAN_MAX_TIME_OPEN_MINUTES = 10 def link_trace_context_to_error_event(event, otel_span_map): @@ -76,6 +79,9 @@ class SentrySpanProcessor(SpanProcessor): # type: ignore # The mapping from otel span ids to sentry spans otel_span_map = {} # type: Dict[str, Union[Transaction, SentrySpan]] + # The currently open spans. Elements will be discarded after SPAN_MAX_TIME_OPEN_MINUTES + open_spans = {} # type: dict[int, set[str]] + def __new__(cls): # type: () -> SentrySpanProcessor if not hasattr(cls, "instance"): @@ -90,6 +96,24 @@ def global_event_processor(event, hint): # type: (Event, Hint) -> Event return link_trace_context_to_error_event(event, self.otel_span_map) + def _prune_old_spans(self): + # type: (SentrySpanProcessor) -> None + """ + Prune spans that have been open for too long. + """ + current_time_minutes = int(time() / 60) + for span_start_minutes in list( + self.open_spans.keys() + ): # making a list because we change the dict + # prune empty open spans buckets + if self.open_spans[span_start_minutes] == set(): + self.open_spans.pop(span_start_minutes) + + # prune old buckets + elif current_time_minutes - span_start_minutes > SPAN_MAX_TIME_OPEN_MINUTES: + for span_id in self.open_spans.pop(span_start_minutes): + self.otel_span_map.pop(span_id, None) + def on_start(self, otel_span, parent_context=None): # type: (OTelSpan, Optional[SpanContext]) -> None hub = Hub.current @@ -125,7 +149,9 @@ def on_start(self, otel_span, parent_context=None): sentry_span = sentry_parent_span.start_child( span_id=trace_data["span_id"], description=otel_span.name, - start_timestamp=utc_from_timestamp(otel_span.start_time / 1e9), + start_timestamp=utc_from_timestamp( + otel_span.start_time / 1e9 + ), # OTel spans have nanosecond precision instrumenter=INSTRUMENTER.OTEL, ) else: @@ -135,12 +161,22 @@ def on_start(self, otel_span, parent_context=None): parent_span_id=parent_span_id, trace_id=trace_data["trace_id"], baggage=trace_data["baggage"], - start_timestamp=utc_from_timestamp(otel_span.start_time / 1e9), + start_timestamp=utc_from_timestamp( + otel_span.start_time / 1e9 + ), # OTel spans have nanosecond precision instrumenter=INSTRUMENTER.OTEL, ) self.otel_span_map[trace_data["span_id"]] = sentry_span + span_start_in_minutes = int( + otel_span.start_time / 1e9 / 60 + ) # OTel spans have nanosecond precision + self.open_spans.setdefault(span_start_in_minutes, set()).add( + trace_data["span_id"] + ) + self._prune_old_spans() + def on_end(self, otel_span): # type: (OTelSpan) -> None hub = Hub.current @@ -173,7 +209,15 @@ def on_end(self, otel_span): else: self._update_span_with_otel_data(sentry_span, otel_span) - sentry_span.finish(end_timestamp=utc_from_timestamp(otel_span.end_time / 1e9)) + sentry_span.finish( + end_timestamp=utc_from_timestamp(otel_span.end_time / 1e9) + ) # OTel spans have nanosecond precision + + span_start_in_minutes = int( + otel_span.start_time / 1e9 / 60 + ) # OTel spans have nanosecond precision + self.open_spans.setdefault(span_start_in_minutes, set()).discard(span_id) + self._prune_old_spans() def _is_sentry_span(self, hub, otel_span): # type: (Hub, OTelSpan) -> bool diff --git a/tests/integrations/opentelemetry/test_span_processor.py b/tests/integrations/opentelemetry/test_span_processor.py index b7e5a7928d..02e3059ca8 100644 --- a/tests/integrations/opentelemetry/test_span_processor.py +++ b/tests/integrations/opentelemetry/test_span_processor.py @@ -531,3 +531,95 @@ def test_link_trace_context_to_error_event(): assert "contexts" in event assert "trace" in event["contexts"] assert event["contexts"]["trace"] == fake_trace_context + + +def test_pruning_old_spans_on_start(): + otel_span = MagicMock() + otel_span.name = "Sample OTel Span" + otel_span.start_time = time.time_ns() + span_context = SpanContext( + trace_id=int("1234567890abcdef1234567890abcdef", 16), + span_id=int("1234567890abcdef", 16), + is_remote=True, + ) + otel_span.get_span_context.return_value = span_context + otel_span.parent = MagicMock() + otel_span.parent.span_id = int("abcdef1234567890", 16) + + parent_context = {} + fake_client = MagicMock() + fake_client.options = {"instrumenter": "otel"} + fake_client.dsn = "https://1234567890abcdef@o123456.ingest.sentry.io/123456" + + current_hub = MagicMock() + current_hub.client = fake_client + + fake_hub = MagicMock() + fake_hub.current = current_hub + + with mock.patch( + "sentry_sdk.integrations.opentelemetry.span_processor.Hub", fake_hub + ): + span_processor = SentrySpanProcessor() + + span_processor.otel_span_map = { + "111111111abcdef": MagicMock(), # should stay + "2222222222abcdef": MagicMock(), # should go + "3333333333abcdef": MagicMock(), # should go + } + current_time_minutes = int(time.time() / 60) + span_processor.open_spans = { + current_time_minutes - 3: {"111111111abcdef"}, # should stay + current_time_minutes + - 11: {"2222222222abcdef", "3333333333abcdef"}, # should go + } + + span_processor.on_start(otel_span, parent_context) + assert sorted(list(span_processor.otel_span_map.keys())) == [ + "111111111abcdef", + "1234567890abcdef", + ] + assert sorted(list(span_processor.open_spans.values())) == [ + {"111111111abcdef"}, + {"1234567890abcdef"}, + ] + + +def test_pruning_old_spans_on_end(): + otel_span = MagicMock() + otel_span.name = "Sample OTel Span" + otel_span.start_time = time.time_ns() + span_context = SpanContext( + trace_id=int("1234567890abcdef1234567890abcdef", 16), + span_id=int("1234567890abcdef", 16), + is_remote=True, + ) + otel_span.get_span_context.return_value = span_context + otel_span.parent = MagicMock() + otel_span.parent.span_id = int("abcdef1234567890", 16) + + fake_sentry_span = MagicMock(spec=Span) + fake_sentry_span.set_context = MagicMock() + fake_sentry_span.finish = MagicMock() + + span_processor = SentrySpanProcessor() + span_processor._get_otel_context = MagicMock() + span_processor._update_span_with_otel_data = MagicMock() + + span_processor.otel_span_map = { + "111111111abcdef": MagicMock(), # should stay + "2222222222abcdef": MagicMock(), # should go + "3333333333abcdef": MagicMock(), # should go + "1234567890abcdef": fake_sentry_span, # should go (because it is closed) + } + current_time_minutes = int(time.time() / 60) + span_processor.open_spans = { + current_time_minutes: {"1234567890abcdef"}, # should go (because it is closed) + current_time_minutes - 3: {"111111111abcdef"}, # should stay + current_time_minutes + - 11: {"2222222222abcdef", "3333333333abcdef"}, # should go + } + + span_processor.on_end(otel_span) + assert sorted(list(span_processor.otel_span_map.keys())) == ["111111111abcdef"] + assert sorted(list(span_processor.open_spans.values())) == [{"111111111abcdef"}] From 5717f1b17e363cc4e3af6b4bfd886158125300ab Mon Sep 17 00:00:00 2001 From: Daniel Szoke Date: Tue, 12 Mar 2024 16:21:24 +0100 Subject: [PATCH 6/9] ref: Event Type (#2753) Implements type hinting for Event via a TypedDict. This commit mainly adjusts type hints; however, there are also some minor code changes to make the code type-safe following the new changes. Some items in the Event could have their types expanded by being defined as TypedDicts themselves. These items have been indicated with TODO comments. Fixes GH-2357 --- sentry_sdk/_types.py | 64 ++++++++++++++++++- sentry_sdk/api.py | 5 +- sentry_sdk/client.py | 15 +++-- sentry_sdk/crons/api.py | 5 +- sentry_sdk/hub.py | 3 +- sentry_sdk/integrations/_wsgi_common.py | 3 +- sentry_sdk/integrations/aiohttp.py | 9 ++- sentry_sdk/integrations/ariadne.py | 6 +- sentry_sdk/integrations/bottle.py | 2 +- sentry_sdk/integrations/django/__init__.py | 4 +- sentry_sdk/integrations/django/asgi.py | 4 +- sentry_sdk/integrations/falcon.py | 6 +- sentry_sdk/integrations/fastapi.py | 5 +- sentry_sdk/integrations/flask.py | 6 +- sentry_sdk/integrations/gnu_backtrace.py | 6 +- sentry_sdk/integrations/gql.py | 4 +- sentry_sdk/integrations/graphene.py | 3 +- sentry_sdk/integrations/logging.py | 7 +- sentry_sdk/integrations/modules.py | 4 +- sentry_sdk/integrations/pyramid.py | 4 +- sentry_sdk/integrations/quart.py | 7 +- sentry_sdk/integrations/rq.py | 14 ++-- sentry_sdk/integrations/spark/spark_worker.py | 2 +- sentry_sdk/integrations/starlette.py | 9 +-- sentry_sdk/integrations/starlite.py | 6 +- sentry_sdk/integrations/stdlib.py | 2 +- sentry_sdk/integrations/strawberry.py | 18 +++--- sentry_sdk/integrations/tornado.py | 6 +- sentry_sdk/integrations/wsgi.py | 4 +- sentry_sdk/profiler.py | 4 +- sentry_sdk/scope.py | 21 +++--- sentry_sdk/tracing.py | 4 +- sentry_sdk/utils.py | 10 +-- 33 files changed, 176 insertions(+), 96 deletions(-) diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index 2536541072..49bffb3416 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -9,6 +9,10 @@ if TYPE_CHECKING: + from collections.abc import MutableMapping + + from datetime import datetime + from types import TracebackType from typing import Any from typing import Callable @@ -19,13 +23,69 @@ from typing import Tuple from typing import Type from typing import Union - from typing_extensions import Literal + from typing_extensions import Literal, TypedDict + + # "critical" is an alias of "fatal" recognized by Relay + LogLevelStr = Literal["fatal", "critical", "error", "warning", "info", "debug"] + + Event = TypedDict( + "Event", + { + "breadcrumbs": dict[ + Literal["values"], list[dict[str, Any]] + ], # TODO: We can expand on this type + "check_in_id": str, + "contexts": dict[str, dict[str, object]], + "dist": str, + "duration": Optional[float], + "environment": str, + "errors": list[dict[str, Any]], # TODO: We can expand on this type + "event_id": str, + "exception": dict[ + Literal["values"], list[dict[str, Any]] + ], # TODO: We can expand on this type + "extra": MutableMapping[str, object], + "fingerprint": list[str], + "level": LogLevelStr, + "logentry": Mapping[str, object], + "logger": str, + "measurements": dict[str, object], + "message": str, + "modules": dict[str, str], + "monitor_config": Mapping[str, object], + "monitor_slug": Optional[str], + "platform": Literal["python"], + "profile": object, # Should be sentry_sdk.profiler.Profile, but we can't import that here due to circular imports + "release": str, + "request": dict[str, object], + "sdk": Mapping[str, object], + "server_name": str, + "spans": list[dict[str, object]], + "stacktrace": dict[ + str, object + ], # We access this key in the code, but I am unsure whether we ever set it + "start_timestamp": datetime, + "status": Optional[str], + "tags": MutableMapping[ + str, str + ], # Tags must be less than 200 characters each + "threads": dict[ + Literal["values"], list[dict[str, Any]] + ], # TODO: We can expand on this type + "timestamp": Optional[datetime], # Must be set before sending the event + "transaction": str, + "transaction_info": Mapping[str, Any], # TODO: We can expand on this type + "type": Literal["check_in", "transaction"], + "user": dict[str, object], + "_metrics_summary": dict[str, object], + }, + total=False, + ) ExcInfo = Tuple[ Optional[Type[BaseException]], Optional[BaseException], Optional[TracebackType] ] - Event = Dict[str, Any] Hint = Dict[str, Any] Breadcrumb = Dict[str, Any] diff --git a/sentry_sdk/api.py b/sentry_sdk/api.py index 1b56571bfa..3148c43f1a 100644 --- a/sentry_sdk/api.py +++ b/sentry_sdk/api.py @@ -22,6 +22,7 @@ BreadcrumbHint, ExcInfo, MeasurementUnit, + LogLevelStr, ) from sentry_sdk.tracing import Span @@ -91,7 +92,7 @@ def capture_event( @hubmethod def capture_message( message, # type: str - level=None, # type: Optional[str] + level=None, # type: Optional[LogLevelStr] scope=None, # type: Optional[Any] **scope_kwargs # type: Any ): @@ -189,7 +190,7 @@ def set_user(value): @scopemethod def set_level(value): - # type: (str) -> None + # type: (LogLevelStr) -> None return Hub.current.scope.set_level(value) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 64e65a8cb6..296de71804 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -1,3 +1,8 @@ +try: + from collections.abc import Mapping +except ImportError: + from collections import Mapping # type: ignore[attr-defined] + from importlib import import_module import os import uuid @@ -38,7 +43,7 @@ from sentry_sdk.utils import ContextVar from sentry_sdk.sessions import SessionFlusher from sentry_sdk.envelope import Envelope -from sentry_sdk.profiler import has_profiling_enabled, setup_profiler +from sentry_sdk.profiler import has_profiling_enabled, Profile, setup_profiler from sentry_sdk.scrubber import EventScrubber from sentry_sdk.monitor import Monitor from sentry_sdk.spotlight import setup_spotlight @@ -393,7 +398,7 @@ def _prepare_event( for key in "release", "environment", "server_name", "dist": if event.get(key) is None and self.options[key] is not None: - event[key] = text_type(self.options[key]).strip() + event[key] = text_type(self.options[key]).strip() # type: ignore[literal-required] if event.get("sdk") is None: sdk_info = dict(SDK_INFO) sdk_info["integrations"] = sorted(self.integrations.keys()) @@ -567,7 +572,7 @@ def _update_session_from_event( errored = True for error in exceptions: mechanism = error.get("mechanism") - if mechanism and mechanism.get("handled") is False: + if isinstance(mechanism, Mapping) and mechanism.get("handled") is False: crashed = True break @@ -659,7 +664,7 @@ def capture_event( headers = { "event_id": event_opt["event_id"], "sent_at": format_timestamp(datetime_utcnow()), - } + } # type: dict[str, object] if dynamic_sampling_context: headers["trace"] = dynamic_sampling_context @@ -667,7 +672,7 @@ def capture_event( envelope = Envelope(headers=headers) if is_transaction: - if profile is not None: + if isinstance(profile, Profile): envelope.add_profile(profile.to_json(event_opt, self.options)) envelope.add_transaction(event_opt) elif is_checkin: diff --git a/sentry_sdk/crons/api.py b/sentry_sdk/crons/api.py index cd240a7dcd..92d113a924 100644 --- a/sentry_sdk/crons/api.py +++ b/sentry_sdk/crons/api.py @@ -6,6 +6,7 @@ if TYPE_CHECKING: from typing import Any, Dict, Optional + from sentry_sdk._types import Event def _create_check_in_event( @@ -15,7 +16,7 @@ def _create_check_in_event( duration_s=None, monitor_config=None, ): - # type: (Optional[str], Optional[str], Optional[str], Optional[float], Optional[Dict[str, Any]]) -> Dict[str, Any] + # type: (Optional[str], Optional[str], Optional[str], Optional[float], Optional[Dict[str, Any]]) -> Event options = Hub.current.client.options if Hub.current.client else {} check_in_id = check_in_id or uuid.uuid4().hex # type: str @@ -27,7 +28,7 @@ def _create_check_in_event( "duration": duration_s, "environment": options.get("environment", None), "release": options.get("release", None), - } + } # type: Event if monitor_config: check_in["monitor_config"] = monitor_config diff --git a/sentry_sdk/hub.py b/sentry_sdk/hub.py index c339528821..a716d33433 100644 --- a/sentry_sdk/hub.py +++ b/sentry_sdk/hub.py @@ -40,6 +40,7 @@ Breadcrumb, BreadcrumbHint, ExcInfo, + LogLevelStr, ) from sentry_sdk.consts import ClientConstructor @@ -335,7 +336,7 @@ def capture_event(self, event, hint=None, scope=None, **scope_kwargs): return last_event_id def capture_message(self, message, level=None, scope=None, **scope_kwargs): - # type: (str, Optional[str], Optional[Scope], Any) -> Optional[str] + # type: (str, Optional[LogLevelStr], Optional[Scope], Any) -> Optional[str] """ Captures a message. diff --git a/sentry_sdk/integrations/_wsgi_common.py b/sentry_sdk/integrations/_wsgi_common.py index 5a41654498..b72ebde126 100644 --- a/sentry_sdk/integrations/_wsgi_common.py +++ b/sentry_sdk/integrations/_wsgi_common.py @@ -22,6 +22,7 @@ from typing import Dict from typing import Optional from typing import Union + from sentry_sdk._types import Event SENSITIVE_ENV_KEYS = ( @@ -59,7 +60,7 @@ def __init__(self, request): self.request = request def extract_into_event(self, event): - # type: (Dict[str, Any]) -> None + # type: (Event) -> None client = Hub.current.client if client is None: return diff --git a/sentry_sdk/integrations/aiohttp.py b/sentry_sdk/integrations/aiohttp.py index e51bdeeac3..19974030ed 100644 --- a/sentry_sdk/integrations/aiohttp.py +++ b/sentry_sdk/integrations/aiohttp.py @@ -48,13 +48,12 @@ from aiohttp import TraceRequestStartParams, TraceRequestEndParams from types import SimpleNamespace from typing import Any - from typing import Dict from typing import Optional from typing import Tuple from typing import Union from sentry_sdk.utils import ExcInfo - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor TRANSACTION_STYLE_VALUES = ("handler_name", "method_and_path_pattern") @@ -256,10 +255,10 @@ async def on_request_end(session, trace_config_ctx, params): def _make_request_processor(weak_request): # type: (weakref.ReferenceType[Request]) -> EventProcessor def aiohttp_processor( - event, # type: Dict[str, Any] - hint, # type: Dict[str, Tuple[type, BaseException, Any]] + event, # type: Event + hint, # type: dict[str, Tuple[type, BaseException, Any]] ): - # type: (...) -> Dict[str, Any] + # type: (...) -> Event request = weak_request() if request is None: return event diff --git a/sentry_sdk/integrations/ariadne.py b/sentry_sdk/integrations/ariadne.py index 86d6b5e28e..5b98a88443 100644 --- a/sentry_sdk/integrations/ariadne.py +++ b/sentry_sdk/integrations/ariadne.py @@ -23,7 +23,7 @@ from typing import Any, Dict, List, Optional from ariadne.types import GraphQLError, GraphQLResult, GraphQLSchema, QueryParser # type: ignore from graphql.language.ast import DocumentNode # type: ignore - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor class AriadneIntegration(Integration): @@ -131,7 +131,7 @@ def _make_request_event_processor(data): """Add request data and api_target to events.""" def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event if not isinstance(data, dict): return event @@ -163,7 +163,7 @@ def _make_response_event_processor(response): """Add response data to the event's response context.""" def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event with capture_internal_exceptions(): if _should_send_default_pii() and response.get("errors"): contexts = event.setdefault("contexts", {}) diff --git a/sentry_sdk/integrations/bottle.py b/sentry_sdk/integrations/bottle.py index cc6360daa3..6f3678466e 100644 --- a/sentry_sdk/integrations/bottle.py +++ b/sentry_sdk/integrations/bottle.py @@ -200,7 +200,7 @@ def _make_request_event_processor(app, request, integration): # type: (Bottle, LocalRequest, BottleIntegration) -> EventProcessor def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event _set_transaction_name_and_source(event, integration.transaction_style, request) with capture_internal_exceptions(): diff --git a/sentry_sdk/integrations/django/__init__.py b/sentry_sdk/integrations/django/__init__.py index 426565e645..98834a4693 100644 --- a/sentry_sdk/integrations/django/__init__.py +++ b/sentry_sdk/integrations/django/__init__.py @@ -472,7 +472,7 @@ def sentry_patched_get_response(self, request): def _make_wsgi_request_event_processor(weak_request, integration): # type: (Callable[[], WSGIRequest], DjangoIntegration) -> EventProcessor def wsgi_request_event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event # if the request is gone we are fine not logging the data from # it. This might happen if the processor is pushed away to # another thread. @@ -570,7 +570,7 @@ def parsed_body(self): def _set_user_info(request, event): - # type: (WSGIRequest, Dict[str, Any]) -> None + # type: (WSGIRequest, Event) -> None user_info = event.setdefault("user", {}) user = getattr(request, "user", None) diff --git a/sentry_sdk/integrations/django/asgi.py b/sentry_sdk/integrations/django/asgi.py index 18f6a58811..e1ba678011 100644 --- a/sentry_sdk/integrations/django/asgi.py +++ b/sentry_sdk/integrations/django/asgi.py @@ -26,13 +26,13 @@ from django.core.handlers.asgi import ASGIRequest from django.http.response import HttpResponse - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor def _make_asgi_request_event_processor(request): # type: (ASGIRequest) -> EventProcessor def asgi_request_event_processor(event, hint): - # type: (dict[str, Any], dict[str, Any]) -> dict[str, Any] + # type: (Event, dict[str, Any]) -> Event # if the request is gone we are fine not logging the data from # it. This might happen if the processor is pushed away to # another thread. diff --git a/sentry_sdk/integrations/falcon.py b/sentry_sdk/integrations/falcon.py index 3fab11cfeb..d5e2480485 100644 --- a/sentry_sdk/integrations/falcon.py +++ b/sentry_sdk/integrations/falcon.py @@ -18,7 +18,7 @@ from typing import Dict from typing import Optional - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor # In Falcon 3.0 `falcon.api_helpers` is renamed to `falcon.app_helpers` # and `falcon.API` to `falcon.App` @@ -258,7 +258,7 @@ def _has_http_5xx_status(response): def _set_transaction_name_and_source(event, transaction_style, request): - # type: (Dict[str, Any], str, falcon.Request) -> None + # type: (Event, str, falcon.Request) -> None name_for_style = { "uri_template": request.uri_template, "path": request.path, @@ -271,7 +271,7 @@ def _make_request_event_processor(req, integration): # type: (falcon.Request, FalconIntegration) -> EventProcessor def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event _set_transaction_name_and_source(event, integration.transaction_style, req) with capture_internal_exceptions(): diff --git a/sentry_sdk/integrations/fastapi.py b/sentry_sdk/integrations/fastapi.py index 6fbe53b92b..33a5591cc4 100644 --- a/sentry_sdk/integrations/fastapi.py +++ b/sentry_sdk/integrations/fastapi.py @@ -11,6 +11,7 @@ if TYPE_CHECKING: from typing import Any, Callable, Dict from sentry_sdk.scope import Scope + from sentry_sdk._types import Event try: from sentry_sdk.integrations.starlette import ( @@ -111,9 +112,9 @@ async def _sentry_app(*args, **kwargs): info = await extractor.extract_request_info() def _make_request_event_processor(req, integration): - # type: (Any, Any) -> Callable[[Dict[str, Any], Dict[str, Any]], Dict[str, Any]] + # type: (Any, Any) -> Callable[[Event, Dict[str, Any]], Event] def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Event # Extract information from request request_info = event.get("request", {}) diff --git a/sentry_sdk/integrations/flask.py b/sentry_sdk/integrations/flask.py index 453ab48ce3..f0bc3d7750 100644 --- a/sentry_sdk/integrations/flask.py +++ b/sentry_sdk/integrations/flask.py @@ -16,7 +16,7 @@ if TYPE_CHECKING: from typing import Any, Callable, Dict, Union - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor from sentry_sdk.integrations.wsgi import _ScopedResponse from werkzeug.datastructures import FileStorage, ImmutableMultiDict @@ -172,7 +172,7 @@ def _make_request_event_processor(app, request, integration): # type: (Flask, Callable[[], Request], FlaskIntegration) -> EventProcessor def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event # if the request is gone we are fine not logging the data from # it. This might happen if the processor is pushed away to @@ -211,7 +211,7 @@ def _capture_exception(sender, exception, **kwargs): def _add_user_to_event(event): - # type: (Dict[str, Any]) -> None + # type: (Event) -> None if flask_login is None: return diff --git a/sentry_sdk/integrations/gnu_backtrace.py b/sentry_sdk/integrations/gnu_backtrace.py index ad9c437878..f8321a6cd7 100644 --- a/sentry_sdk/integrations/gnu_backtrace.py +++ b/sentry_sdk/integrations/gnu_backtrace.py @@ -9,7 +9,7 @@ if TYPE_CHECKING: from typing import Any - from typing import Dict + from sentry_sdk._types import Event MODULE_RE = r"[a-zA-Z0-9/._:\\-]+" @@ -42,13 +42,13 @@ def setup_once(): # type: () -> None @add_global_event_processor def process_gnu_backtrace(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event with capture_internal_exceptions(): return _process_gnu_backtrace(event, hint) def _process_gnu_backtrace(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event if Hub.current.get_integration(GnuBacktraceIntegration) is None: return event diff --git a/sentry_sdk/integrations/gql.py b/sentry_sdk/integrations/gql.py index 79fc8d022f..9db6632a4a 100644 --- a/sentry_sdk/integrations/gql.py +++ b/sentry_sdk/integrations/gql.py @@ -14,7 +14,7 @@ if TYPE_CHECKING: from typing import Any, Dict, Tuple, Union - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor EventDataType = Dict[str, Union[str, Tuple[VariableDefinitionNode, ...]]] @@ -112,7 +112,7 @@ def sentry_patched_execute(self, document, *args, **kwargs): def _make_gql_event_processor(client, document): # type: (gql.Client, DocumentNode) -> EventProcessor def processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event try: errors = hint["exc_info"][1].errors except (AttributeError, KeyError): diff --git a/sentry_sdk/integrations/graphene.py b/sentry_sdk/integrations/graphene.py index fa753d0812..b9c3b26018 100644 --- a/sentry_sdk/integrations/graphene.py +++ b/sentry_sdk/integrations/graphene.py @@ -19,6 +19,7 @@ from graphene.language.source import Source # type: ignore from graphql.execution import ExecutionResult # type: ignore from graphql.type import GraphQLSchema # type: ignore + from sentry_sdk._types import Event class GrapheneIntegration(Integration): @@ -100,7 +101,7 @@ async def _sentry_patched_graphql_async(schema, source, *args, **kwargs): def _event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Event if _should_send_default_pii(): request_info = event.setdefault("request", {}) request_info["api_target"] = "graphql" diff --git a/sentry_sdk/integrations/logging.py b/sentry_sdk/integrations/logging.py index ee6bb8e1d1..d455983fc5 100644 --- a/sentry_sdk/integrations/logging.py +++ b/sentry_sdk/integrations/logging.py @@ -16,6 +16,7 @@ from sentry_sdk._types import TYPE_CHECKING if TYPE_CHECKING: + from collections.abc import MutableMapping from logging import LogRecord from typing import Any from typing import Dict @@ -156,7 +157,7 @@ def _logging_to_event_level(self, record): ) def _extra_from_record(self, record): - # type: (LogRecord) -> Dict[str, None] + # type: (LogRecord) -> MutableMapping[str, object] return { k: v for k, v in iteritems(vars(record)) @@ -225,7 +226,9 @@ def _emit(self, record): hint["log_record"] = record - event["level"] = self._logging_to_event_level(record) + level = self._logging_to_event_level(record) + if level in {"debug", "info", "warning", "error", "critical", "fatal"}: + event["level"] = level # type: ignore[typeddict-item] event["logger"] = record.name # Log records from `warnings` module as separate issues diff --git a/sentry_sdk/integrations/modules.py b/sentry_sdk/integrations/modules.py index 5b595b4032..fa0fbf8936 100644 --- a/sentry_sdk/integrations/modules.py +++ b/sentry_sdk/integrations/modules.py @@ -9,8 +9,6 @@ if TYPE_CHECKING: from typing import Any - from typing import Dict - from sentry_sdk._types import Event @@ -22,7 +20,7 @@ def setup_once(): # type: () -> None @add_global_event_processor def processor(event, hint): - # type: (Event, Any) -> Dict[str, Any] + # type: (Event, Any) -> Event if event.get("type") == "transaction": return event diff --git a/sentry_sdk/integrations/pyramid.py b/sentry_sdk/integrations/pyramid.py index 80750f0268..3b9b2fdb96 100644 --- a/sentry_sdk/integrations/pyramid.py +++ b/sentry_sdk/integrations/pyramid.py @@ -36,7 +36,7 @@ from webob.compat import cgi_FieldStorage # type: ignore from sentry_sdk.utils import ExcInfo - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor if getattr(Request, "authenticated_userid", None): @@ -216,7 +216,7 @@ def size_of_file(self, postdata): def _make_event_processor(weak_request, integration): # type: (Callable[[], Request], PyramidIntegration) -> EventProcessor def pyramid_event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Event request = weak_request() if request is None: return event diff --git a/sentry_sdk/integrations/quart.py b/sentry_sdk/integrations/quart.py index 4dee751d65..8803fa7cea 100644 --- a/sentry_sdk/integrations/quart.py +++ b/sentry_sdk/integrations/quart.py @@ -20,10 +20,9 @@ if TYPE_CHECKING: from typing import Any - from typing import Dict from typing import Union - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor try: import quart_auth # type: ignore @@ -186,7 +185,7 @@ async def _request_websocket_started(app, **kwargs): def _make_request_event_processor(app, request, integration): # type: (Quart, Request, QuartIntegration) -> EventProcessor def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event # if the request is gone we are fine not logging the data from # it. This might happen if the processor is pushed away to # another thread. @@ -231,7 +230,7 @@ async def _capture_exception(sender, exception, **kwargs): def _add_user_to_event(event): - # type: (Dict[str, Any]) -> None + # type: (Event) -> None if quart_auth is None: return diff --git a/sentry_sdk/integrations/rq.py b/sentry_sdk/integrations/rq.py index b5eeb0be85..2b32e59880 100644 --- a/sentry_sdk/integrations/rq.py +++ b/sentry_sdk/integrations/rq.py @@ -27,9 +27,9 @@ from sentry_sdk._types import TYPE_CHECKING if TYPE_CHECKING: - from typing import Any, Callable, Dict + from typing import Any, Callable - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor from sentry_sdk.utils import ExcInfo from rq.job import Job @@ -126,12 +126,12 @@ def sentry_patched_enqueue_job(self, job, **kwargs): def _make_event_processor(weak_job): # type: (Callable[[], Job]) -> EventProcessor def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event job = weak_job() if job is not None: with capture_internal_exceptions(): extra = event.setdefault("extra", {}) - extra["rq-job"] = { + rq_job = { "job_id": job.id, "func": job.func_name, "args": job.args, @@ -140,9 +140,11 @@ def event_processor(event, hint): } if job.enqueued_at: - extra["rq-job"]["enqueued_at"] = format_timestamp(job.enqueued_at) + rq_job["enqueued_at"] = format_timestamp(job.enqueued_at) if job.started_at: - extra["rq-job"]["started_at"] = format_timestamp(job.started_at) + rq_job["started_at"] = format_timestamp(job.started_at) + + extra["rq-job"] = rq_job if "exc_info" in hint: with capture_internal_exceptions(): diff --git a/sentry_sdk/integrations/spark/spark_worker.py b/sentry_sdk/integrations/spark/spark_worker.py index cd4eb0f28b..632e870973 100644 --- a/sentry_sdk/integrations/spark/spark_worker.py +++ b/sentry_sdk/integrations/spark/spark_worker.py @@ -58,7 +58,7 @@ def _capture_exception(exc_info, hub): if rv: rv.reverse() hint = event_hint_with_exc_info(exc_info) - event = {"level": "error", "exception": {"values": rv}} + event = {"level": "error", "exception": {"values": rv}} # type: Event _tag_task_context() diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index ed95c757f1..79bb18aa78 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -32,6 +32,7 @@ from typing import Any, Awaitable, Callable, Dict, Optional, Tuple from sentry_sdk.scope import Scope as SentryScope + from sentry_sdk._types import Event try: import starlette # type: ignore @@ -407,9 +408,9 @@ async def _sentry_async_func(*args, **kwargs): info = await extractor.extract_request_info() def _make_request_event_processor(req, integration): - # type: (Any, Any) -> Callable[[Dict[str, Any], Dict[str, Any]], Dict[str, Any]] + # type: (Any, Any) -> Callable[[Event, dict[str, Any]], Event] def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Event # Add info from request to event request_info = event.get("request", {}) @@ -455,9 +456,9 @@ def _sentry_sync_func(*args, **kwargs): cookies = extractor.extract_cookies_from_request() def _make_request_event_processor(req, integration): - # type: (Any, Any) -> Callable[[Dict[str, Any], Dict[str, Any]], Dict[str, Any]] + # type: (Any, Any) -> Callable[[Event, dict[str, Any]], Event] def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event # Extract information from request request_info = event.get("request", {}) diff --git a/sentry_sdk/integrations/starlite.py b/sentry_sdk/integrations/starlite.py index 3900ce8c8a..070675c2e7 100644 --- a/sentry_sdk/integrations/starlite.py +++ b/sentry_sdk/integrations/starlite.py @@ -219,7 +219,11 @@ def event_processor(event: "Event", _: "Dict[str, Any]") -> "Event": tx_info = {"source": TRANSACTION_SOURCE_ROUTE} event.update( - request=request_info, transaction=tx_name, transaction_info=tx_info + { + "request": request_info, + "transaction": tx_name, + "transaction_info": tx_info, + } ) return event diff --git a/sentry_sdk/integrations/stdlib.py b/sentry_sdk/integrations/stdlib.py index a5c3bfb2ae..0a17834a40 100644 --- a/sentry_sdk/integrations/stdlib.py +++ b/sentry_sdk/integrations/stdlib.py @@ -39,7 +39,7 @@ "name": platform.python_implementation(), "version": "%s.%s.%s" % (sys.version_info[:3]), "build": sys.version, -} +} # type: dict[str, object] class StdlibIntegration(Integration): diff --git a/sentry_sdk/integrations/strawberry.py b/sentry_sdk/integrations/strawberry.py index 8f4314f663..3d450e0692 100644 --- a/sentry_sdk/integrations/strawberry.py +++ b/sentry_sdk/integrations/strawberry.py @@ -29,11 +29,11 @@ raise DidNotEnable("strawberry-graphql is not installed") if TYPE_CHECKING: - from typing import Any, Callable, Dict, Generator, List, Optional + from typing import Any, Callable, Generator, List, Optional from graphql import GraphQLError, GraphQLResolveInfo # type: ignore from strawberry.http import GraphQLHTTPResponse from strawberry.types import ExecutionContext, ExecutionResult # type: ignore - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor ignore_logger("strawberry.execution") @@ -349,21 +349,21 @@ def _make_request_event_processor(execution_context): # type: (ExecutionContext) -> EventProcessor def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event with capture_internal_exceptions(): if _should_send_default_pii(): request_data = event.setdefault("request", {}) request_data["api_target"] = "graphql" if not request_data.get("data"): - request_data["data"] = {"query": execution_context.query} + data = {"query": execution_context.query} if execution_context.variables: - request_data["data"]["variables"] = execution_context.variables + data["variables"] = execution_context.variables if execution_context.operation_name: - request_data["data"][ - "operationName" - ] = execution_context.operation_name + data["operationName"] = execution_context.operation_name + + request_data["data"] = data else: try: @@ -380,7 +380,7 @@ def _make_response_event_processor(response_data): # type: (GraphQLHTTPResponse) -> EventProcessor def inner(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event with capture_internal_exceptions(): if _should_send_default_pii(): contexts = event.setdefault("contexts", {}) diff --git a/sentry_sdk/integrations/tornado.py b/sentry_sdk/integrations/tornado.py index 8af93c47f3..c6f7700f12 100644 --- a/sentry_sdk/integrations/tornado.py +++ b/sentry_sdk/integrations/tornado.py @@ -41,7 +41,7 @@ from typing import Callable from typing import Generator - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor class TornadoIntegration(Integration): @@ -155,7 +155,7 @@ def _capture_exception(ty, value, tb): def _make_event_processor(weak_handler): # type: (Callable[[], RequestHandler]) -> EventProcessor def tornado_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, dict[str, Any]) -> Event handler = weak_handler() if handler is None: return event @@ -164,7 +164,7 @@ def tornado_processor(event, hint): with capture_internal_exceptions(): method = getattr(handler, handler.request.method.lower()) - event["transaction"] = transaction_from_function(method) + event["transaction"] = transaction_from_function(method) or "" event["transaction_info"] = {"source": TRANSACTION_SOURCE_COMPONENT} with capture_internal_exceptions(): diff --git a/sentry_sdk/integrations/wsgi.py b/sentry_sdk/integrations/wsgi.py index 0d53766efb..e7fd0da66d 100644 --- a/sentry_sdk/integrations/wsgi.py +++ b/sentry_sdk/integrations/wsgi.py @@ -27,7 +27,7 @@ from typing import Protocol from sentry_sdk.utils import ExcInfo - from sentry_sdk._types import EventProcessor + from sentry_sdk._types import Event, EventProcessor WsgiResponseIter = TypeVar("WsgiResponseIter") WsgiResponseHeaders = TypeVar("WsgiResponseHeaders") @@ -254,7 +254,7 @@ def _make_wsgi_event_processor(environ, use_x_forwarded_for): headers = _filter_headers(dict(_get_headers(environ))) def event_processor(event, hint): - # type: (Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Event with capture_internal_exceptions(): # if the code below fails halfway through we at least have some data request_info = event.setdefault("request", {}) diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py index be954b2a2c..ef4868f745 100644 --- a/sentry_sdk/profiler.py +++ b/sentry_sdk/profiler.py @@ -62,7 +62,7 @@ from typing_extensions import TypedDict import sentry_sdk.tracing - from sentry_sdk._types import SamplingContext, ProfilerMode + from sentry_sdk._types import Event, SamplingContext, ProfilerMode ThreadId = str @@ -673,7 +673,7 @@ def process(self): } def to_json(self, event_opt, options): - # type: (Any, Dict[str, Any], Dict[str, Any]) -> Dict[str, Any] + # type: (Event, Dict[str, Any]) -> Dict[str, Any] profile = self.process() set_in_app_in_frames( diff --git a/sentry_sdk/scope.py b/sentry_sdk/scope.py index b0dcca8b15..80537cd8bf 100644 --- a/sentry_sdk/scope.py +++ b/sentry_sdk/scope.py @@ -33,6 +33,8 @@ ) if TYPE_CHECKING: + from collections.abc import MutableMapping + from typing import Any from typing import Callable from typing import Deque @@ -53,6 +55,7 @@ EventProcessor, ExcInfo, Hint, + LogLevelStr, Type, ) @@ -414,15 +417,15 @@ def iter_trace_propagation_headers(self, *args, **kwargs): def clear(self): # type: () -> None """Clears the entire scope.""" - self._level = None # type: Optional[str] + self._level = None # type: Optional[LogLevelStr] self._fingerprint = None # type: Optional[List[str]] self._transaction = None # type: Optional[str] - self._transaction_info = {} # type: Dict[str, str] + self._transaction_info = {} # type: MutableMapping[str, str] self._user = None # type: Optional[Dict[str, Any]] self._tags = {} # type: Dict[str, Any] self._contexts = {} # type: Dict[str, Dict[str, Any]] - self._extras = {} # type: Dict[str, Any] + self._extras = {} # type: MutableMapping[str, Any] self._attachments = [] # type: List[Attachment] self.clear_breadcrumbs() @@ -438,12 +441,12 @@ def clear(self): @_attr_setter def level(self, value): - # type: (Optional[str]) -> None + # type: (Optional[LogLevelStr]) -> None """When set this overrides the level. Deprecated in favor of set_level.""" self._level = value def set_level(self, value): - # type: (Optional[str]) -> None + # type: (Optional[LogLevelStr]) -> None """Sets the level for the scope.""" self._level = value @@ -848,7 +851,7 @@ def capture_event(self, event, hint=None, client=None, scope=None, **scope_kwarg def capture_message( self, message, level=None, client=None, scope=None, **scope_kwargs ): - # type: (str, Optional[str], Optional[sentry_sdk.Client], Optional[Scope], Any) -> Optional[str] + # type: (str, Optional[LogLevelStr], Optional[sentry_sdk.Client], Optional[Scope], Any) -> Optional[str] """ Captures a message. @@ -876,7 +879,7 @@ def capture_message( event = { "message": message, "level": level, - } + } # type: Event return self.capture_event(event, client=client, scope=scope, **scope_kwargs) @@ -1079,7 +1082,7 @@ def _apply_contexts_to_event(self, event, hint, options): # Add "reply_id" context try: - replay_id = contexts["trace"]["dynamic_sampling_context"]["replay_id"] + replay_id = contexts["trace"]["dynamic_sampling_context"]["replay_id"] # type: ignore except (KeyError, TypeError): replay_id = None @@ -1192,7 +1195,7 @@ def update_from_scope(self, scope): def update_from_kwargs( self, user=None, # type: Optional[Any] - level=None, # type: Optional[str] + level=None, # type: Optional[LogLevelStr] extras=None, # type: Optional[Dict[str, Any]] contexts=None, # type: Optional[Dict[str, Any]] tags=None, # type: Optional[Dict[str, str]] diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 80e9ace939..bac1ceaa60 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -14,7 +14,7 @@ if TYPE_CHECKING: import typing - from collections.abc import Callable + from collections.abc import Callable, MutableMapping from typing import Any from typing import Dict from typing import Iterator @@ -151,7 +151,7 @@ def __init__( self.description = description self.status = status self.hub = hub - self._tags = {} # type: Dict[str, str] + self._tags = {} # type: MutableMapping[str, str] self._data = {} # type: Dict[str, Any] self._containing_transaction = containing_transaction if start_timestamp is None: diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 7c10d7cf43..150130a057 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -75,7 +75,7 @@ Union, ) - from sentry_sdk._types import EndpointType, ExcInfo + from sentry_sdk._types import EndpointType, Event, ExcInfo epoch = datetime(1970, 1, 1) @@ -975,7 +975,7 @@ def to_string(value): def iter_event_stacktraces(event): - # type: (Dict[str, Any]) -> Iterator[Dict[str, Any]] + # type: (Event) -> Iterator[Dict[str, Any]] if "stacktrace" in event: yield event["stacktrace"] if "threads" in event: @@ -989,14 +989,14 @@ def iter_event_stacktraces(event): def iter_event_frames(event): - # type: (Dict[str, Any]) -> Iterator[Dict[str, Any]] + # type: (Event) -> Iterator[Dict[str, Any]] for stacktrace in iter_event_stacktraces(event): for frame in stacktrace.get("frames") or (): yield frame def handle_in_app(event, in_app_exclude=None, in_app_include=None, project_root=None): - # type: (Dict[str, Any], Optional[List[str]], Optional[List[str]], Optional[str]) -> Dict[str, Any] + # type: (Event, Optional[List[str]], Optional[List[str]], Optional[str]) -> Event for stacktrace in iter_event_stacktraces(event): set_in_app_in_frames( stacktrace.get("frames"), @@ -1074,7 +1074,7 @@ def event_from_exception( client_options=None, # type: Optional[Dict[str, Any]] mechanism=None, # type: Optional[Dict[str, Any]] ): - # type: (...) -> Tuple[Dict[str, Any], Dict[str, Any]] + # type: (...) -> Tuple[Event, Dict[str, Any]] exc_info = exc_info_from_error(exc_info) hint = event_hint_with_exc_info(exc_info) return ( From 586d59d34b80e4782dc2c35b6e3ebd854014c2ca Mon Sep 17 00:00:00 2001 From: Daniel Szoke Date: Tue, 12 Mar 2024 16:51:55 +0100 Subject: [PATCH 7/9] Fix mypy in `client.py` --- sentry_sdk/client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 33093e7a42..33c00cb256 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -647,7 +647,8 @@ def _update_session_from_event( if session.user_agent is None: headers = (event.get("request") or {}).get("headers") - for k, v in (headers or {}).items(): + headers_dict = headers if isinstance(headers, dict) else {} + for k, v in headers_dict.items(): if k.lower() == "user-agent": user_agent = v break From c326bf4f0b05537a7d75acea935e2f7030ae27ce Mon Sep 17 00:00:00 2001 From: Daniel Szoke Date: Tue, 12 Mar 2024 16:53:40 +0100 Subject: [PATCH 8/9] Fix functools import --- sentry_sdk/integrations/openai.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 0e71029b60..a787c54cee 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -1,3 +1,5 @@ +from functools import wraps + from sentry_sdk import consts from sentry_sdk._types import TYPE_CHECKING @@ -6,7 +8,6 @@ from sentry_sdk.tracing import Span import sentry_sdk -from sentry_sdk._functools import wraps from sentry_sdk.hub import Hub, _should_send_default_pii from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.utils import logger, capture_internal_exceptions, event_from_exception From ce145ff8f4b1ad6a72b1a058abc9b2b3ac8d717c Mon Sep 17 00:00:00 2001 From: Daniel Szoke Date: Tue, 12 Mar 2024 16:57:21 +0100 Subject: [PATCH 9/9] Fix CI config problem ... by running `python scripts/split-tox-gh-actions/split-tox-gh-actions.py` --- .github/workflows/test-integrations-data-processing.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-integrations-data-processing.yml b/.github/workflows/test-integrations-data-processing.yml index add0d664e2..ed2e261d07 100644 --- a/.github/workflows/test-integrations-data-processing.yml +++ b/.github/workflows/test-integrations-data-processing.yml @@ -25,7 +25,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.6", "3.7", "3.8", "3.9", "3.11", "3.12"] + python-version: ["3.6","3.7","3.8","3.9","3.11","3.12"] # python3.6 reached EOL and is no longer being supported on # new versions of hosted runners on Github Actions # ubuntu-20.04 is the last version that supported python3.6 @@ -81,7 +81,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.6","3.7","3.8","3.9","3.10","3.11","3.12"] # python3.6 reached EOL and is no longer being supported on # new versions of hosted runners on Github Actions # ubuntu-20.04 is the last version that supported python3.6