From 6d79ebf3b97f124cc96557c87cfa62af4ee593bd Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Wed, 6 Mar 2024 16:25:51 -0500 Subject: [PATCH 01/20] OpenAI integration --- sentry_sdk/integrations/__init__.py | 1 + sentry_sdk/integrations/openai.py | 224 +++++++++++++++++++++++ tests/integrations/openai/test_openai.py | 148 +++++++++++++++ tox.ini | 9 + 4 files changed, 382 insertions(+) create mode 100644 sentry_sdk/integrations/openai.py create mode 100644 tests/integrations/openai/test_openai.py diff --git a/sentry_sdk/integrations/__init__.py b/sentry_sdk/integrations/__init__.py index 21f7188ff1..c9737ae589 100644 --- a/sentry_sdk/integrations/__init__.py +++ b/sentry_sdk/integrations/__init__.py @@ -78,6 +78,7 @@ def iter_default_integrations(with_auto_enabling_integrations): "sentry_sdk.integrations.fastapi.FastApiIntegration", "sentry_sdk.integrations.flask.FlaskIntegration", "sentry_sdk.integrations.httpx.HttpxIntegration", + "sentry_sdk.integrations.openai.OpenAIIntegration", "sentry_sdk.integrations.pyramid.PyramidIntegration", "sentry_sdk.integrations.redis.RedisIntegration", "sentry_sdk.integrations.rq.RqIntegration", diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py new file mode 100644 index 0000000000..9f66df35ee --- /dev/null +++ b/sentry_sdk/integrations/openai.py @@ -0,0 +1,224 @@ +from __future__ import absolute_import + +from sentry_sdk._types import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Iterator, Any, TypeVar, Callable + + F = TypeVar("F", bound=Callable[..., Any]) + +from sentry_sdk._functools import wraps +from sentry_sdk.hub import Hub +from sentry_sdk.integrations import DidNotEnable, Integration +from sentry_sdk.utils import logger, capture_internal_exceptions + +try: + from openai.types.chat import ChatCompletionChunk + from openai.resources.chat.completions import Completions + from openai.resources import Embeddings +except ImportError: + raise DidNotEnable("OpenAI not installed") + +try: + import tiktoken + + enc = tiktoken.get_encoding("cl100k_base") + + def count_tokens(s): + # type: (str) -> int + return len(enc.encode_ordinary(s)) + + logger.debug("[OpenAI] using tiktoken to count tokens") +except ImportError: + logger.info( + "The Sentry Python SDK requires 'tiktoken' in order to measure token usage from some OpenAI APIs" + "Please install 'tiktoken' if you aren't receiving token usage in Sentry." + "See https://docs.sentry.io/platforms/python/guides/openai/ for more information." + ) + + def count_tokens(s): + # type: (str) -> int + return 0 + + +COMPLETION_TOKENS = "completion_tоkens" +PROMPT_TOKENS = "prompt_tоkens" +TOTAL_TOKENS = "total_tоkens" + + +class OpenAIIntegration(Integration): + identifier = "openai" + + @staticmethod + def setup_once(): + # TODO minimum version + Completions.create = _wrap_chat_completion_create(Completions.create) + Embeddings.create = _wrap_enbeddings_create(Embeddings.create) + + +def _calculate_chat_completion_usage( + messages, response, span, streaming_message_responses=None +): + completion_tokens = 0 + prompt_tokens = 0 + total_tokens = 0 + if hasattr(response, "usage"): + if hasattr(response.usage, "completion_tokens") and isinstance( + response.usage.completion_tokens, int + ): + completion_tokens = response.usage.completion_tokens + if hasattr(response.usage, "prompt_tokens") and isinstance( + response.usage.prompt_tokens, int + ): + prompt_tokens = response.usage.prompt_tokens + if hasattr(response.usage, "total_tokens") and isinstance( + response.usage.total_tokens, int + ): + total_tokens = response.usage.total_tokens + + if prompt_tokens == 0: + for message in messages: + if hasattr(message, "content"): + prompt_tokens += count_tokens(message.content) + elif "content" in message: + prompt_tokens += count_tokens(message["content"]) + + if completion_tokens == 0: + if streaming_message_responses is not None: + for message in streaming_message_responses: + completion_tokens += count_tokens(message) + elif hasattr(response, "choices"): + for choice in response.choices: + if hasattr(choice, "message"): + completion_tokens += count_tokens(choice.message) + + if total_tokens == 0: + total_tokens = prompt_tokens + completion_tokens + + if completion_tokens != 0: + span.set_data(COMPLETION_TOKENS, completion_tokens) + if prompt_tokens != 0: + span.set_data(PROMPT_TOKENS, prompt_tokens) + if total_tokens != 0: + span.set_data(TOTAL_TOKENS, total_tokens) + + +def _wrap_chat_completion_create(f): + # type: (F) -> F + @wraps(f) + def new_chat_completion(*args, **kwargs): + # type: (*Any, **Any) -> Any + hub = Hub.current + integration = hub.get_integration(OpenAIIntegration) + if integration is None: + return f(*args, **kwargs) + + if "messages" not in kwargs: + # invalid call (in all versions of openai), let it return error + return f(*args, **kwargs) + + try: + iter(kwargs["messages"]) + except TypeError: + # invalid call (in all versions), messages must be iterable + return f(*args, **kwargs) + + kwargs["messages"] = list(kwargs["messages"]) + messages = kwargs["messages"] + model = kwargs.get("model") + streaming = kwargs.get("stream") # TODO handle streaming + + span = hub.start_span(op="openai", description="Chat Completion") + span.__enter__() + res = f(*args, **kwargs) + with capture_internal_exceptions(): + span.set_data("messages", messages) + span.set_tag("model", model) + span.set_tag("streaming", streaming) + + if hasattr(res, "choices"): + span.set_data("response", res.choices[0].message) + _calculate_chat_completion_usage(messages, res, span) + span.__exit__(None, None, None) + elif hasattr(res, "_iterator"): + data_buf: list[list[str]] = [] # one for each choice + + old_iterator: Iterator[ChatCompletionChunk] = res._iterator + + def new_iterator() -> Iterator[ChatCompletionChunk]: + with capture_internal_exceptions(): + for x in old_iterator: + if hasattr(x, "choices"): + choice_index = 0 + for choice in x.choices: + if hasattr(choice, "delta") and hasattr( + choice.delta, "content" + ): + content = choice.delta.content + if len(data_buf) <= choice_index: + data_buf.append([]) + data_buf[choice_index].append(content or "") + choice_index += 1 + yield x + if len(data_buf) > 0: + all_responses = list( + map(lambda chunk: "".join(chunk), data_buf) + ) + span.set_data("responses", all_responses) + _calculate_chat_completion_usage( + messages, res, span, all_responses + ) + span.__exit__(None, None, None) + + res._iterator = new_iterator() + else: + span.set_tag("unknown_response", True) + span.__exit__(None, None, None) + return res + + return new_chat_completion + + +def _wrap_enbeddings_create(f): + # type: (F) -> F + + @wraps(f) + def new_embeddings_create(*args, **kwargs): + hub = Hub.current + integration = hub.get_integration(OpenAIIntegration) + if integration is None: + return f(*args, **kwargs) + + with hub.start_span(op="openai", description="Embeddings Creation") as span: + if "input" in kwargs and isinstance(kwargs["input"], str): + span.set_data("input", kwargs["input"]) + if "model" in kwargs: + span.set_tag("model", kwargs["model"]) + if "dimensions" in kwargs: + span.set_tag("dimensions", kwargs["dimensions"]) + response = f(*args, **kwargs) + + prompt_tokens = 0 + total_tokens = 0 + if hasattr(response, "usage"): + if hasattr(response.usage, "prompt_tokens") and isinstance( + response.usage.prompt_tokens, int + ): + prompt_tokens = response.usage.prompt_tokens + if hasattr(response.usage, "total_tokens") and isinstance( + response.usage.total_tokens, int + ): + total_tokens = response.usage.total_tokens + + if prompt_tokens == 0: + prompt_tokens = count_tokens(kwargs["input"] or "") + + if total_tokens == 0: + total_tokens = prompt_tokens + + span.set_data(PROMPT_TOKENS, prompt_tokens) + span.set_data(TOTAL_TOKENS, total_tokens) + + return response + + return new_embeddings_create diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py new file mode 100644 index 0000000000..c9ecace759 --- /dev/null +++ b/tests/integrations/openai/test_openai.py @@ -0,0 +1,148 @@ +from openai import OpenAI, Stream +from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding +from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk +from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice +from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage + +from sentry_sdk import start_transaction +from sentry_sdk.integrations.openai import OpenAIIntegration + +try: + from unittest import mock # python 3.3 and above +except ImportError: + import mock # python < 3.3 + +COMPLETION_TOKENS = "completion_tоkens" +PROMPT_TOKENS = "prompt_tоkens" +TOTAL_TOKENS = "total_tоkens" + + +def test_nonstreaming_chat_completion(sentry_init, capture_events): + sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + events = capture_events() + + client = OpenAI(api_key="z") + returned_chat = ChatCompletion( + id="chat-id", + choices=[ + Choice( + index=0, + finish_reason="stop", + message=ChatCompletionMessage(role="assistant", content="response"), + ) + ], + created=10000000, + model="model-id", + object="chat.completion", + usage=CompletionUsage( + completion_tokens=10, + prompt_tokens=20, + total_tokens=30, + ), + ) + + client.chat.completions._post = mock.Mock(return_value=returned_chat) + with start_transaction(name="openai tx"): + response = ( + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + .choices[0] + .message.content + ) + + assert response == "response" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "openai" + + assert span["data"][COMPLETION_TOKENS] == 10 + assert span["data"][PROMPT_TOKENS] == 20 + assert span["data"][TOTAL_TOKENS] == 30 + + +# noinspection PyTypeChecker +def test_streaming_chat_completion(sentry_init, capture_events): + sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + events = capture_events() + + client = OpenAI(api_key="z") + returned_stream = Stream(cast_to=None, response=None, client=None) + returned_stream._iterator = [ + ChatCompletionChunk( + id="1", + choices=[Choice(index=0, delta=ChoiceDelta(content="hel"))], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[Choice(index=1, delta=ChoiceDelta(content="lo "))], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ChatCompletionChunk( + id="1", + choices=[ + Choice( + index=2, delta=ChoiceDelta(content="world"), finish_reason="stop" + ) + ], + created=100000, + model="model-id", + object="chat.completion.chunk", + ), + ] + + client.chat.completions._post = mock.Mock(return_value=returned_stream) + with start_transaction(name="openai tx"): + response_stream = client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + response_string = "".join( + map(lambda x: x.choices[0].delta.content, response_stream) + ) + assert response_string == "hello world" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "openai" + assert span["data"][COMPLETION_TOKENS] == 2 + assert span["data"][PROMPT_TOKENS] == 1 + assert span["data"][TOTAL_TOKENS] == 3 + + +def test_embeddings_create(sentry_init, capture_events): + sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + events = capture_events() + + client = OpenAI(api_key="z") + + returned_embedding = CreateEmbeddingResponse( + data=[Embedding(object="embedding", index=0, embedding=[1.0, 2.0, 3.0])], + model="some-model", + object="list", + usage=EmbeddingTokenUsage( + prompt_tokens=20, + total_tokens=30, + ), + ) + + client.embeddings._post = mock.Mock(return_value=returned_embedding) + with start_transaction(name="openai tx"): + response = client.embeddings.create( + input="test", model="text-embedding-3-large" + ) + + assert len(response.data[0].embedding) == 3 + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "openai" + + assert span["data"][PROMPT_TOKENS] == 20 + assert span["data"][TOTAL_TOKENS] == 30 diff --git a/tox.ini b/tox.ini index a23251f186..9598c52521 100644 --- a/tox.ini +++ b/tox.ini @@ -146,6 +146,9 @@ envlist = {py3.5,py3.11,py3.12}-loguru-v{0.5} {py3.5,py3.11,py3.12}-loguru-latest + # OpenAI + {py3.7,py3.9,py3.11,py3.12}-openai + # OpenTelemetry (OTel) {py3.7,py3.9,py3.11,py3.12}-opentelemetry @@ -439,6 +442,11 @@ deps = loguru-v0.5: loguru~=0.5.0 loguru-latest: loguru + # OpenAI + openai-v1: openai~=1.0.0 + openai-v1: tiktoken + openai-latest: openai + # OpenTelemetry (OTel) opentelemetry: opentelemetry-distro @@ -597,6 +605,7 @@ setenv = httpx: TESTPATH=tests/integrations/httpx huey: TESTPATH=tests/integrations/huey loguru: TESTPATH=tests/integrations/loguru + openai: TESTPATH=tests/integrations/openai opentelemetry: TESTPATH=tests/integrations/opentelemetry pure_eval: TESTPATH=tests/integrations/pure_eval pymongo: TESTPATH=tests/integrations/pymongo From edf396c3920795e83344715818df08cf1a06dc69 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Wed, 6 Mar 2024 16:45:12 -0500 Subject: [PATCH 02/20] Fix linting errors --- sentry_sdk/integrations/openai.py | 24 ++++++++++++++---------- tox.ini | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 9f66df35ee..21946b5631 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -3,9 +3,8 @@ from sentry_sdk._types import TYPE_CHECKING if TYPE_CHECKING: - from typing import Iterator, Any, TypeVar, Callable - - F = TypeVar("F", bound=Callable[..., Any]) + from typing import Iterator, Any, Iterable, List, Optional, Callable + from sentry_sdk.tracing import Span from sentry_sdk._functools import wraps from sentry_sdk.hub import Hub @@ -13,14 +12,17 @@ from sentry_sdk.utils import logger, capture_internal_exceptions try: - from openai.types.chat import ChatCompletionChunk - from openai.resources.chat.completions import Completions - from openai.resources import Embeddings + from openai.types.chat import ChatCompletionChunk # type: ignore + from openai.resources.chat.completions import Completions # type: ignore + from openai.resources import Embeddings # type: ignore + + if TYPE_CHECKING: + from openai.types.chat import ChatCompletionMessageParam except ImportError: raise DidNotEnable("OpenAI not installed") try: - import tiktoken + import tiktoken # type: ignore enc = tiktoken.get_encoding("cl100k_base") @@ -51,7 +53,7 @@ class OpenAIIntegration(Integration): @staticmethod def setup_once(): - # TODO minimum version + # type: () -> None Completions.create = _wrap_chat_completion_create(Completions.create) Embeddings.create = _wrap_enbeddings_create(Embeddings.create) @@ -59,6 +61,7 @@ def setup_once(): def _calculate_chat_completion_usage( messages, response, span, streaming_message_responses=None ): + # type: (Iterable[ChatCompletionMessageParam], Any, Span, Optional[List[str]]) -> None completion_tokens = 0 prompt_tokens = 0 total_tokens = 0 @@ -104,7 +107,7 @@ def _calculate_chat_completion_usage( def _wrap_chat_completion_create(f): - # type: (F) -> F + # type: (Callable[..., Any]) -> Callable[..., Any] @wraps(f) def new_chat_completion(*args, **kwargs): # type: (*Any, **Any) -> Any @@ -180,10 +183,11 @@ def new_iterator() -> Iterator[ChatCompletionChunk]: def _wrap_enbeddings_create(f): - # type: (F) -> F + # type: (Callable[..., Any]) -> Callable[..., Any] @wraps(f) def new_embeddings_create(*args, **kwargs): + # type: (*Any, **Any) -> Any hub = Hub.current integration = hub.get_integration(OpenAIIntegration) if integration is None: diff --git a/tox.ini b/tox.ini index 9598c52521..3226d9d0d7 100644 --- a/tox.ini +++ b/tox.ini @@ -443,8 +443,8 @@ deps = loguru-latest: loguru # OpenAI + openai: tiktoken openai-v1: openai~=1.0.0 - openai-v1: tiktoken openai-latest: openai # OpenTelemetry (OTel) From 05ecefa78a30fdaa72d79054ae08ea5ce2b27215 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Wed, 6 Mar 2024 16:58:10 -0500 Subject: [PATCH 03/20] Fix CI --- .../test-integrations-data-processing.yml | 14 +++++++++++++- mypy.ini | 2 ++ .../split-tox-gh-actions/split-tox-gh-actions.py | 1 + tests/integrations/openai/__init__.py | 3 +++ tox.ini | 2 +- 5 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 tests/integrations/openai/__init__.py diff --git a/.github/workflows/test-integrations-data-processing.yml b/.github/workflows/test-integrations-data-processing.yml index ddac93d1e5..c40d45845d 100644 --- a/.github/workflows/test-integrations-data-processing.yml +++ b/.github/workflows/test-integrations-data-processing.yml @@ -25,7 +25,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.5","3.7","3.8","3.11","3.12"] + python-version: ["3.5","3.7","3.8","3.9","3.11","3.12"] # python3.6 reached EOL and is no longer being supported on # new versions of hosted runners on Github Actions # ubuntu-20.04 is the last version that supported python3.6 @@ -58,6 +58,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh "py${{ matrix.python-version }}-huey-latest" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + - name: Test openai latest + run: | + set -x # print commands that are executed + ./scripts/runtox.sh "py${{ matrix.python-version }}-openai-latest" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch - name: Test rq latest run: | set -x # print commands that are executed @@ -110,6 +114,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-huey" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + - name: Test openai pinned + run: | + set -x # print commands that are executed + ./scripts/runtox.sh --exclude-latest "py${{ matrix.python-version }}-openai" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch - name: Test rq pinned run: | set -x # print commands that are executed @@ -151,6 +159,10 @@ jobs: run: | set -x # print commands that are executed ./scripts/runtox.sh --exclude-latest "py2.7-huey" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch + - name: Test openai py27 + run: | + set -x # print commands that are executed + ./scripts/runtox.sh --exclude-latest "py2.7-openai" --cov=tests --cov=sentry_sdk --cov-report= --cov-branch - name: Test rq py27 run: | set -x # print commands that are executed diff --git a/mypy.ini b/mypy.ini index fef90c867e..c1444d61e5 100644 --- a/mypy.ini +++ b/mypy.ini @@ -67,6 +67,8 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-huey.*] ignore_missing_imports = True +[mypy-openai.*] +ignore_missing_imports = True [mypy-arq.*] ignore_missing_imports = True [mypy-grpc.*] diff --git a/scripts/split-tox-gh-actions/split-tox-gh-actions.py b/scripts/split-tox-gh-actions/split-tox-gh-actions.py index f8beffc219..13b81283ca 100755 --- a/scripts/split-tox-gh-actions/split-tox-gh-actions.py +++ b/scripts/split-tox-gh-actions/split-tox-gh-actions.py @@ -70,6 +70,7 @@ "beam", "celery", "huey", + "openai", "rq", ], "Databases": [ diff --git a/tests/integrations/openai/__init__.py b/tests/integrations/openai/__init__.py new file mode 100644 index 0000000000..d6cc3d5505 --- /dev/null +++ b/tests/integrations/openai/__init__.py @@ -0,0 +1,3 @@ +import pytest + +pytest.importorskip("openai") diff --git a/tox.ini b/tox.ini index 3226d9d0d7..2c7beb6df9 100644 --- a/tox.ini +++ b/tox.ini @@ -147,7 +147,7 @@ envlist = {py3.5,py3.11,py3.12}-loguru-latest # OpenAI - {py3.7,py3.9,py3.11,py3.12}-openai + {py3.5,py3.7,py3.9,py3.11,py3.12}-openai-latest # OpenTelemetry (OTel) {py3.7,py3.9,py3.11,py3.12}-opentelemetry From a3ccbcd704a211de0f57f653ae88a36480af8148 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Wed, 6 Mar 2024 17:03:31 -0500 Subject: [PATCH 04/20] Fix lint --- sentry_sdk/integrations/openai.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 21946b5631..44aab84cef 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -12,9 +12,9 @@ from sentry_sdk.utils import logger, capture_internal_exceptions try: - from openai.types.chat import ChatCompletionChunk # type: ignore - from openai.resources.chat.completions import Completions # type: ignore - from openai.resources import Embeddings # type: ignore + from openai.types.chat import ChatCompletionChunk + from openai.resources.chat.completions import Completions + from openai.resources import Embeddings if TYPE_CHECKING: from openai.types.chat import ChatCompletionMessageParam From 6003902c8b687b326d218d33529d56e08a086659 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Wed, 6 Mar 2024 17:16:50 -0500 Subject: [PATCH 05/20] Fix more CI issues --- sentry_sdk/integrations/openai.py | 8 +++++--- tests/integrations/openai/test_openai.py | 9 +++++---- tox.ini | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 44aab84cef..30b9ed9fae 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -3,7 +3,7 @@ from sentry_sdk._types import TYPE_CHECKING if TYPE_CHECKING: - from typing import Iterator, Any, Iterable, List, Optional, Callable + from typing import Any, Iterable, List, Optional, Callable, Iterator from sentry_sdk.tracing import Span from sentry_sdk._functools import wraps @@ -146,9 +146,11 @@ def new_chat_completion(*args, **kwargs): elif hasattr(res, "_iterator"): data_buf: list[list[str]] = [] # one for each choice - old_iterator: Iterator[ChatCompletionChunk] = res._iterator + # type: Iterator[ChatCompletionChunk] + old_iterator = res._iterator - def new_iterator() -> Iterator[ChatCompletionChunk]: + def new_iterator(): + # type: () -> Iterator[ChatCompletionChunk] with capture_internal_exceptions(): for x in old_iterator: if hasattr(x, "choices"): diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index c9ecace759..167c3c97ff 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1,7 +1,8 @@ from openai import OpenAI, Stream from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk -from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice +from openai.types.chat.chat_completion import Choice +from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage from sentry_sdk import start_transaction @@ -72,14 +73,14 @@ def test_streaming_chat_completion(sentry_init, capture_events): returned_stream._iterator = [ ChatCompletionChunk( id="1", - choices=[Choice(index=0, delta=ChoiceDelta(content="hel"))], + choices=[DeltaChoice(index=0, delta=ChoiceDelta(content="hel"))], created=100000, model="model-id", object="chat.completion.chunk", ), ChatCompletionChunk( id="1", - choices=[Choice(index=1, delta=ChoiceDelta(content="lo "))], + choices=[DeltaChoice(index=1, delta=ChoiceDelta(content="lo "))], created=100000, model="model-id", object="chat.completion.chunk", @@ -87,7 +88,7 @@ def test_streaming_chat_completion(sentry_init, capture_events): ChatCompletionChunk( id="1", choices=[ - Choice( + DeltaChoice( index=2, delta=ChoiceDelta(content="world"), finish_reason="stop" ) ], diff --git a/tox.ini b/tox.ini index 2c7beb6df9..f3e623af76 100644 --- a/tox.ini +++ b/tox.ini @@ -147,7 +147,7 @@ envlist = {py3.5,py3.11,py3.12}-loguru-latest # OpenAI - {py3.5,py3.7,py3.9,py3.11,py3.12}-openai-latest + {py3.9,py3.11,py3.12}-openai-latest # OpenTelemetry (OTel) {py3.7,py3.9,py3.11,py3.12}-opentelemetry From 09013f286b637c11cb211e7ff1aab57a10ce751d Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Wed, 6 Mar 2024 17:19:22 -0500 Subject: [PATCH 06/20] Run tests on version pinned OpenAI too --- tox.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/tox.ini b/tox.ini index f3e623af76..c9e476ed24 100644 --- a/tox.ini +++ b/tox.ini @@ -147,6 +147,7 @@ envlist = {py3.5,py3.11,py3.12}-loguru-latest # OpenAI + {py3.9,py3.11,py3.12}-openai-v1 {py3.9,py3.11,py3.12}-openai-latest # OpenTelemetry (OTel) From 2f9667a5afaf67ac1d42ce287a0d719d1cdc1240 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Wed, 6 Mar 2024 17:26:52 -0500 Subject: [PATCH 07/20] Fix pydantic issue in test --- sentry_sdk/integrations/openai.py | 3 +-- tests/integrations/openai/test_openai.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 30b9ed9fae..1606224030 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -146,8 +146,7 @@ def new_chat_completion(*args, **kwargs): elif hasattr(res, "_iterator"): data_buf: list[list[str]] = [] # one for each choice - # type: Iterator[ChatCompletionChunk] - old_iterator = res._iterator + old_iterator = res._iterator # type: Iterator[ChatCompletionChunk] def new_iterator(): # type: () -> Iterator[ChatCompletionChunk] diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 167c3c97ff..b44c3151ec 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -73,14 +73,22 @@ def test_streaming_chat_completion(sentry_init, capture_events): returned_stream._iterator = [ ChatCompletionChunk( id="1", - choices=[DeltaChoice(index=0, delta=ChoiceDelta(content="hel"))], + choices=[ + DeltaChoice( + index=0, delta=ChoiceDelta(content="hel"), finish_reason=None + ) + ], created=100000, model="model-id", object="chat.completion.chunk", ), ChatCompletionChunk( id="1", - choices=[DeltaChoice(index=1, delta=ChoiceDelta(content="lo "))], + choices=[ + DeltaChoice( + index=1, delta=ChoiceDelta(content="lo "), finish_reason=None + ) + ], created=100000, model="model-id", object="chat.completion.chunk", From ec84c041abec9b80fd089b517f8bf30361017d79 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Wed, 6 Mar 2024 17:28:29 -0500 Subject: [PATCH 08/20] Import type in TYPE_CHECKING gate --- sentry_sdk/integrations/openai.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 1606224030..ba41e1289c 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -12,12 +12,11 @@ from sentry_sdk.utils import logger, capture_internal_exceptions try: - from openai.types.chat import ChatCompletionChunk from openai.resources.chat.completions import Completions from openai.resources import Embeddings if TYPE_CHECKING: - from openai.types.chat import ChatCompletionMessageParam + from openai.types.chat import ChatCompletionMessageParam, ChatCompletionChunk except ImportError: raise DidNotEnable("OpenAI not installed") @@ -129,7 +128,7 @@ def new_chat_completion(*args, **kwargs): kwargs["messages"] = list(kwargs["messages"]) messages = kwargs["messages"] model = kwargs.get("model") - streaming = kwargs.get("stream") # TODO handle streaming + streaming = kwargs.get("stream") span = hub.start_span(op="openai", description="Chat Completion") span.__enter__() From e45420eef8fb0a74f1a9f0841cf80eb688d24a5a Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Thu, 7 Mar 2024 11:02:15 -0500 Subject: [PATCH 09/20] PR feedback fixes --- sentry_sdk/consts.py | 2 + sentry_sdk/integrations/openai.py | 81 ++++++++++++++---------- tests/integrations/openai/test_openai.py | 58 +++++++++++------ tox.ini | 5 +- 4 files changed, 92 insertions(+), 54 deletions(-) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 2b58aecc24..ce38ebeef9 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -219,6 +219,8 @@ class OP: MIDDLEWARE_STARLITE = "middleware.starlite" MIDDLEWARE_STARLITE_RECEIVE = "middleware.starlite.receive" MIDDLEWARE_STARLITE_SEND = "middleware.starlite.send" + OPENAI_CHAT_COMPLETIONS_CREATE = "openai.chat_completions.create" + OPENAI_EMBEDDINGS_CREATE = "openai.embeddings.create" QUEUE_SUBMIT_ARQ = "queue.submit.arq" QUEUE_TASK_ARQ = "queue.task.arq" QUEUE_SUBMIT_CELERY = "queue.submit.celery" diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index ba41e1289c..f3bee1c674 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -1,15 +1,15 @@ -from __future__ import absolute_import - +from sentry_sdk import consts from sentry_sdk._types import TYPE_CHECKING if TYPE_CHECKING: from typing import Any, Iterable, List, Optional, Callable, Iterator from sentry_sdk.tracing import Span +import sentry_sdk from sentry_sdk._functools import wraps from sentry_sdk.hub import Hub from sentry_sdk.integrations import DidNotEnable, Integration -from sentry_sdk.utils import logger, capture_internal_exceptions +from sentry_sdk.utils import logger, capture_internal_exceptions, event_from_exception try: from openai.resources.chat.completions import Completions @@ -42,9 +42,9 @@ def count_tokens(s): return 0 -COMPLETION_TOKENS = "completion_tоkens" -PROMPT_TOKENS = "prompt_tоkens" -TOTAL_TOKENS = "total_tоkens" +COMPLETION_TOKENS_USED = "ai.completion_tоkens.used" +PROMPT_TOKENS_USED = "ai.prompt_tоkens.used" +TOTAL_TOKENS_USED = "ai.total_tоkens.used" class OpenAIIntegration(Integration): @@ -54,7 +54,19 @@ class OpenAIIntegration(Integration): def setup_once(): # type: () -> None Completions.create = _wrap_chat_completion_create(Completions.create) - Embeddings.create = _wrap_enbeddings_create(Embeddings.create) + Embeddings.create = _wrap_embeddings_create(Embeddings.create) + + +def _capture_exception(hub, exc): + # type: (Hub, Any) -> None + + if hub.client is not None: + event, hint = event_from_exception( + exc, + client_options=hub.client.options, + mechanism={"type": "openai", "handled": False}, + ) + hub.capture_event(event, hint=hint) def _calculate_chat_completion_usage( @@ -98,11 +110,11 @@ def _calculate_chat_completion_usage( total_tokens = prompt_tokens + completion_tokens if completion_tokens != 0: - span.set_data(COMPLETION_TOKENS, completion_tokens) + span.set_data(COMPLETION_TOKENS_USED, completion_tokens) if prompt_tokens != 0: - span.set_data(PROMPT_TOKENS, prompt_tokens) + span.set_data(PROMPT_TOKENS_USED, prompt_tokens) if total_tokens != 0: - span.set_data(TOTAL_TOKENS, total_tokens) + span.set_data(TOTAL_TOKENS_USED, total_tokens) def _wrap_chat_completion_create(f): @@ -110,11 +122,6 @@ def _wrap_chat_completion_create(f): @wraps(f) def new_chat_completion(*args, **kwargs): # type: (*Any, **Any) -> Any - hub = Hub.current - integration = hub.get_integration(OpenAIIntegration) - if integration is None: - return f(*args, **kwargs) - if "messages" not in kwargs: # invalid call (in all versions of openai), let it return error return f(*args, **kwargs) @@ -130,13 +137,21 @@ def new_chat_completion(*args, **kwargs): model = kwargs.get("model") streaming = kwargs.get("stream") - span = hub.start_span(op="openai", description="Chat Completion") + span = sentry_sdk.start_span( + op=consts.OP.OPENAI_CHAT_COMPLETIONS_CREATE, description="Chat Completion" + ) span.__enter__() - res = f(*args, **kwargs) + try: + res = f(*args, **kwargs) + except Exception as e: + _capture_exception(Hub.current, e) + span.__exit__(None, None, None) + raise e from None + with capture_internal_exceptions(): span.set_data("messages", messages) - span.set_tag("model", model) - span.set_tag("streaming", streaming) + span.set_data("model", model) + span.set_data("streaming", streaming) if hasattr(res, "choices"): span.set_data("response", res.choices[0].message) @@ -175,32 +190,34 @@ def new_iterator(): res._iterator = new_iterator() else: - span.set_tag("unknown_response", True) + span.set_data("unknown_response", True) span.__exit__(None, None, None) return res return new_chat_completion -def _wrap_enbeddings_create(f): +def _wrap_embeddings_create(f): # type: (Callable[..., Any]) -> Callable[..., Any] @wraps(f) def new_embeddings_create(*args, **kwargs): # type: (*Any, **Any) -> Any - hub = Hub.current - integration = hub.get_integration(OpenAIIntegration) - if integration is None: - return f(*args, **kwargs) - - with hub.start_span(op="openai", description="Embeddings Creation") as span: + with sentry_sdk.start_span( + op=consts.OP.OPENAI_EMBEDDINGS_CREATE, + description="OpenAI Embedding Creation", + ) as span: if "input" in kwargs and isinstance(kwargs["input"], str): span.set_data("input", kwargs["input"]) if "model" in kwargs: - span.set_tag("model", kwargs["model"]) + span.set_data("model", kwargs["model"]) if "dimensions" in kwargs: - span.set_tag("dimensions", kwargs["dimensions"]) - response = f(*args, **kwargs) + span.set_data("dimensions", kwargs["dimensions"]) + try: + response = f(*args, **kwargs) + except Exception as e: + _capture_exception(Hub.current, e) + raise e from None prompt_tokens = 0 total_tokens = 0 @@ -220,8 +237,8 @@ def new_embeddings_create(*args, **kwargs): if total_tokens == 0: total_tokens = prompt_tokens - span.set_data(PROMPT_TOKENS, prompt_tokens) - span.set_data(TOTAL_TOKENS, total_tokens) + span.set_data(PROMPT_TOKENS_USED, prompt_tokens) + span.set_data(TOTAL_TOKENS_USED, total_tokens) return response diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index b44c3151ec..edd37a5b95 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1,4 +1,5 @@ -from openai import OpenAI, Stream +import pytest +from openai import OpenAI, Stream, OpenAIError from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk from openai.types.chat.chat_completion import Choice @@ -6,16 +7,14 @@ from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage from sentry_sdk import start_transaction -from sentry_sdk.integrations.openai import OpenAIIntegration +from sentry_sdk.integrations.openai import ( + OpenAIIntegration, + COMPLETION_TOKENS_USED, + PROMPT_TOKENS_USED, + TOTAL_TOKENS_USED, +) -try: - from unittest import mock # python 3.3 and above -except ImportError: - import mock # python < 3.3 - -COMPLETION_TOKENS = "completion_tоkens" -PROMPT_TOKENS = "prompt_tоkens" -TOTAL_TOKENS = "total_tоkens" +from unittest import mock # python 3.3 and above def test_nonstreaming_chat_completion(sentry_init, capture_events): @@ -56,11 +55,11 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events): tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] - assert span["op"] == "openai" + assert span["op"] == "openai.chat_completions.create" - assert span["data"][COMPLETION_TOKENS] == 10 - assert span["data"][PROMPT_TOKENS] == 20 - assert span["data"][TOTAL_TOKENS] == 30 + assert span["data"][COMPLETION_TOKENS_USED] == 10 + assert span["data"][PROMPT_TOKENS_USED] == 20 + assert span["data"][TOTAL_TOKENS_USED] == 30 # noinspection PyTypeChecker @@ -118,10 +117,27 @@ def test_streaming_chat_completion(sentry_init, capture_events): tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] - assert span["op"] == "openai" - assert span["data"][COMPLETION_TOKENS] == 2 - assert span["data"][PROMPT_TOKENS] == 1 - assert span["data"][TOTAL_TOKENS] == 3 + assert span["op"] == "openai.chat_completions.create" + assert span["data"][COMPLETION_TOKENS_USED] == 2 + assert span["data"][PROMPT_TOKENS_USED] == 1 + assert span["data"][TOTAL_TOKENS_USED] == 3 + + +def test_bad_chat_completion(sentry_init, capture_events): + sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock( + side_effect=OpenAIError("API rate limit reached") + ) + with pytest.raises(OpenAIError): + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + (event,) = events + assert event["level"] == "error" def test_embeddings_create(sentry_init, capture_events): @@ -151,7 +167,7 @@ def test_embeddings_create(sentry_init, capture_events): tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] - assert span["op"] == "openai" + assert span["op"] == "openai.embeddings.create" - assert span["data"][PROMPT_TOKENS] == 20 - assert span["data"][TOTAL_TOKENS] == 30 + assert span["data"][PROMPT_TOKENS_USED] == 20 + assert span["data"][TOTAL_TOKENS_USED] == 30 diff --git a/tox.ini b/tox.ini index c9e476ed24..32a9b748df 100644 --- a/tox.ini +++ b/tox.ini @@ -149,6 +149,7 @@ envlist = # OpenAI {py3.9,py3.11,py3.12}-openai-v1 {py3.9,py3.11,py3.12}-openai-latest + {py3.9,py3.11,py3.12}-openai-without-tiktoken # OpenTelemetry (OTel) {py3.7,py3.9,py3.11,py3.12}-opentelemetry @@ -444,9 +445,11 @@ deps = loguru-latest: loguru # OpenAI - openai: tiktoken openai-v1: openai~=1.0.0 + openai-v1: tiktoken~=0.6.0 openai-latest: openai + openai-latest: tiktoken~=0.6.0 + openai-without-tiktoken: openai # OpenTelemetry (OTel) opentelemetry: opentelemetry-distro From 0e319f1d3808ceacaa5b620b068cf924224033a0 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Thu, 7 Mar 2024 11:10:32 -0500 Subject: [PATCH 10/20] Fix tiktoken test variant --- setup.py | 1 + tests/integrations/openai/test_openai.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 0af275d6af..0299bf91fb 100644 --- a/setup.py +++ b/setup.py @@ -60,6 +60,7 @@ def get_file_text(file_name): "httpx": ["httpx>=0.16.0"], "huey": ["huey>=2"], "loguru": ["loguru>=0.5"], + "openai": ["openai>=1.0.0", "tiktoken>=0.3.0"], "opentelemetry": ["opentelemetry-distro>=0.35b0"], "opentelemetry-experimental": [ "opentelemetry-distro~=0.40b0", diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index edd37a5b95..a4d0de2b76 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -118,9 +118,15 @@ def test_streaming_chat_completion(sentry_init, capture_events): assert tx["type"] == "transaction" span = tx["spans"][0] assert span["op"] == "openai.chat_completions.create" - assert span["data"][COMPLETION_TOKENS_USED] == 2 - assert span["data"][PROMPT_TOKENS_USED] == 1 - assert span["data"][TOTAL_TOKENS_USED] == 3 + + try: + import tiktoken # type: ignore # noqa # pylint: disable=unused-import + + assert span["data"][COMPLETION_TOKENS_USED] == 2 + assert span["data"][PROMPT_TOKENS_USED] == 1 + assert span["data"][TOTAL_TOKENS_USED] == 3 + except ImportError: + pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly def test_bad_chat_completion(sentry_init, capture_events): From 3acc9ab5f9f50c0b45d235535e7cb9f97faa5ee8 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Thu, 7 Mar 2024 11:12:44 -0500 Subject: [PATCH 11/20] PII gate the request and response --- sentry_sdk/integrations/openai.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index f3bee1c674..eef584de9f 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -7,7 +7,7 @@ import sentry_sdk from sentry_sdk._functools import wraps -from sentry_sdk.hub import Hub +from sentry_sdk.hub import Hub, _should_send_default_pii from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.utils import logger, capture_internal_exceptions, event_from_exception @@ -149,12 +149,14 @@ def new_chat_completion(*args, **kwargs): raise e from None with capture_internal_exceptions(): - span.set_data("messages", messages) + if _should_send_default_pii(): + span.set_data("messages", messages) span.set_data("model", model) span.set_data("streaming", streaming) if hasattr(res, "choices"): - span.set_data("response", res.choices[0].message) + if _should_send_default_pii(): + span.set_data("response", res.choices[0].message) _calculate_chat_completion_usage(messages, res, span) span.__exit__(None, None, None) elif hasattr(res, "_iterator"): @@ -182,7 +184,8 @@ def new_iterator(): all_responses = list( map(lambda chunk: "".join(chunk), data_buf) ) - span.set_data("responses", all_responses) + if _should_send_default_pii(): + span.set_data("responses", all_responses) _calculate_chat_completion_usage( messages, res, span, all_responses ) From 65c311dea597681456ef587e890852b6a28a85ea Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Thu, 7 Mar 2024 11:44:37 -0500 Subject: [PATCH 12/20] Rename set_data tags --- sentry_sdk/integrations/openai.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index eef584de9f..7ba4112dbd 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -150,13 +150,15 @@ def new_chat_completion(*args, **kwargs): with capture_internal_exceptions(): if _should_send_default_pii(): - span.set_data("messages", messages) - span.set_data("model", model) - span.set_data("streaming", streaming) + span.set_data("ai.input_messages", messages) + span.set_data("ai.model_id", model) + span.set_data("ai.streaming", streaming) if hasattr(res, "choices"): if _should_send_default_pii(): - span.set_data("response", res.choices[0].message) + span.set_data( + "ai.responses", list(map(lambda x: x.message, res.choices)) + ) _calculate_chat_completion_usage(messages, res, span) span.__exit__(None, None, None) elif hasattr(res, "_iterator"): @@ -185,7 +187,7 @@ def new_iterator(): map(lambda chunk: "".join(chunk), data_buf) ) if _should_send_default_pii(): - span.set_data("responses", all_responses) + span.set_data("ai.responses", all_responses) _calculate_chat_completion_usage( messages, res, span, all_responses ) @@ -210,12 +212,17 @@ def new_embeddings_create(*args, **kwargs): op=consts.OP.OPENAI_EMBEDDINGS_CREATE, description="OpenAI Embedding Creation", ) as span: - if "input" in kwargs and isinstance(kwargs["input"], str): - span.set_data("input", kwargs["input"]) + if "input" in kwargs: + if isinstance(kwargs["input"], str): + span.set_data("ai.input_messages", [kwargs["input"]]) + elif ( + isinstance(kwargs["input"], list) + and len(kwargs["input"]) > 0 + and isinstance(kwargs["input"][0], str) + ): + span.set_data("ai.input_messages", kwargs["input"]) if "model" in kwargs: - span.set_data("model", kwargs["model"]) - if "dimensions" in kwargs: - span.set_data("dimensions", kwargs["dimensions"]) + span.set_data("ai.model_id", kwargs["model"]) try: response = f(*args, **kwargs) except Exception as e: From 4b33a4e0a85a1922f6c929197880c1fe9e00b194 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Thu, 7 Mar 2024 12:00:07 -0500 Subject: [PATCH 13/20] Move doc location --- sentry_sdk/integrations/openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 7ba4112dbd..e669fd852f 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -34,7 +34,7 @@ def count_tokens(s): logger.info( "The Sentry Python SDK requires 'tiktoken' in order to measure token usage from some OpenAI APIs" "Please install 'tiktoken' if you aren't receiving token usage in Sentry." - "See https://docs.sentry.io/platforms/python/guides/openai/ for more information." + "See https://docs.sentry.io/platforms/python/integrations/openai/ for more information." ) def count_tokens(s): From 56d267944eb1da7ce0d84f05f5722704e2714f4f Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Thu, 7 Mar 2024 13:00:59 -0500 Subject: [PATCH 14/20] Add "exclude prompts" flag as optional --- sentry_sdk/integrations/openai.py | 34 +++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index e669fd852f..be0f8c3812 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -50,6 +50,10 @@ def count_tokens(s): class OpenAIIntegration(Integration): identifier = "openai" + def __init__(self, exclude_prompts=False): + # type: (OpenAIIntegration, bool) -> None + self.exclude_prompts = exclude_prompts + @staticmethod def setup_once(): # type: () -> None @@ -122,6 +126,14 @@ def _wrap_chat_completion_create(f): @wraps(f) def new_chat_completion(*args, **kwargs): # type: (*Any, **Any) -> Any + hub = Hub.current + if not hub: + return f(*args, **kwargs) + + integration = hub.get_integration(OpenAIIntegration) # type: OpenAIIntegration + if not integration: + return f(*args, **kwargs) + if "messages" not in kwargs: # invalid call (in all versions of openai), let it return error return f(*args, **kwargs) @@ -149,13 +161,13 @@ def new_chat_completion(*args, **kwargs): raise e from None with capture_internal_exceptions(): - if _should_send_default_pii(): + if _should_send_default_pii() or not integration.exclude_prompts: span.set_data("ai.input_messages", messages) span.set_data("ai.model_id", model) span.set_data("ai.streaming", streaming) if hasattr(res, "choices"): - if _should_send_default_pii(): + if _should_send_default_pii() or not integration.exclude_prompts: span.set_data( "ai.responses", list(map(lambda x: x.message, res.choices)) ) @@ -186,7 +198,10 @@ def new_iterator(): all_responses = list( map(lambda chunk: "".join(chunk), data_buf) ) - if _should_send_default_pii(): + if ( + _should_send_default_pii() + or not integration.exclude_prompts + ): span.set_data("ai.responses", all_responses) _calculate_chat_completion_usage( messages, res, span, all_responses @@ -208,11 +223,22 @@ def _wrap_embeddings_create(f): @wraps(f) def new_embeddings_create(*args, **kwargs): # type: (*Any, **Any) -> Any + + hub = Hub.current + if not hub: + return f(*args, **kwargs) + + integration = hub.get_integration(OpenAIIntegration) # type: OpenAIIntegration + if not integration: + return f(*args, **kwargs) + with sentry_sdk.start_span( op=consts.OP.OPENAI_EMBEDDINGS_CREATE, description="OpenAI Embedding Creation", ) as span: - if "input" in kwargs: + if "input" in kwargs and ( + _should_send_default_pii() or not integration.exclude_prompts + ): if isinstance(kwargs["input"], str): span.set_data("ai.input_messages", [kwargs["input"]]) elif ( From 72d4b5a17a8ccc565203672ba5474dae33e93074 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Thu, 7 Mar 2024 13:07:23 -0500 Subject: [PATCH 15/20] Change prompts to be excluded by default --- sentry_sdk/integrations/openai.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index be0f8c3812..4428f76b5c 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -50,9 +50,9 @@ def count_tokens(s): class OpenAIIntegration(Integration): identifier = "openai" - def __init__(self, exclude_prompts=False): + def __init__(self, include_prompts=False): # type: (OpenAIIntegration, bool) -> None - self.exclude_prompts = exclude_prompts + self.include_prompts = include_prompts @staticmethod def setup_once(): @@ -161,13 +161,13 @@ def new_chat_completion(*args, **kwargs): raise e from None with capture_internal_exceptions(): - if _should_send_default_pii() or not integration.exclude_prompts: + if _should_send_default_pii() or integration.include_prompts: span.set_data("ai.input_messages", messages) span.set_data("ai.model_id", model) span.set_data("ai.streaming", streaming) if hasattr(res, "choices"): - if _should_send_default_pii() or not integration.exclude_prompts: + if _should_send_default_pii() or integration.include_prompts: span.set_data( "ai.responses", list(map(lambda x: x.message, res.choices)) ) @@ -200,7 +200,7 @@ def new_iterator(): ) if ( _should_send_default_pii() - or not integration.exclude_prompts + or integration.include_prompts ): span.set_data("ai.responses", all_responses) _calculate_chat_completion_usage( @@ -237,7 +237,7 @@ def new_embeddings_create(*args, **kwargs): description="OpenAI Embedding Creation", ) as span: if "input" in kwargs and ( - _should_send_default_pii() or not integration.exclude_prompts + _should_send_default_pii() or integration.include_prompts ): if isinstance(kwargs["input"], str): span.set_data("ai.input_messages", [kwargs["input"]]) From b206e4c1e4fbea57012a386103d80ff3f9bd01b6 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Thu, 7 Mar 2024 13:16:57 -0500 Subject: [PATCH 16/20] Set flag in tests --- sentry_sdk/integrations/openai.py | 4 +--- tests/integrations/openai/test_openai.py | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 4428f76b5c..56d20613db 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -96,9 +96,7 @@ def _calculate_chat_completion_usage( if prompt_tokens == 0: for message in messages: - if hasattr(message, "content"): - prompt_tokens += count_tokens(message.content) - elif "content" in message: + if "content" in message: prompt_tokens += count_tokens(message["content"]) if completion_tokens == 0: diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index a4d0de2b76..761f605168 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -18,7 +18,9 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events): - sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0 + ) events = capture_events() client = OpenAI(api_key="z") @@ -64,7 +66,9 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events): # noinspection PyTypeChecker def test_streaming_chat_completion(sentry_init, capture_events): - sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0 + ) events = capture_events() client = OpenAI(api_key="z") @@ -130,7 +134,9 @@ def test_streaming_chat_completion(sentry_init, capture_events): def test_bad_chat_completion(sentry_init, capture_events): - sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0 + ) events = capture_events() client = OpenAI(api_key="z") @@ -147,7 +153,9 @@ def test_bad_chat_completion(sentry_init, capture_events): def test_embeddings_create(sentry_init, capture_events): - sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0 + ) events = capture_events() client = OpenAI(api_key="z") From 4bc43107f7eb86aeebc78425948be2595353bf6b Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Thu, 7 Mar 2024 15:07:43 -0500 Subject: [PATCH 17/20] Fix tiktoken tox.ini extra dash --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 32a9b748df..dcb61f5ddb 100644 --- a/tox.ini +++ b/tox.ini @@ -149,7 +149,7 @@ envlist = # OpenAI {py3.9,py3.11,py3.12}-openai-v1 {py3.9,py3.11,py3.12}-openai-latest - {py3.9,py3.11,py3.12}-openai-without-tiktoken + {py3.9,py3.11,py3.12}-openai-notiktoken # OpenTelemetry (OTel) {py3.7,py3.9,py3.11,py3.12}-opentelemetry From c6f5cd2accbc76043116802a4dc965b1d443e403 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Fri, 8 Mar 2024 10:16:12 -0500 Subject: [PATCH 18/20] Change strip PII semantics --- sentry_sdk/consts.py | 4 +- sentry_sdk/integrations/openai.py | 10 +-- tests/integrations/openai/test_openai.py | 108 ++++++++++++++++------- 3 files changed, 85 insertions(+), 37 deletions(-) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index ce38ebeef9..e4edfddef1 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -219,8 +219,8 @@ class OP: MIDDLEWARE_STARLITE = "middleware.starlite" MIDDLEWARE_STARLITE_RECEIVE = "middleware.starlite.receive" MIDDLEWARE_STARLITE_SEND = "middleware.starlite.send" - OPENAI_CHAT_COMPLETIONS_CREATE = "openai.chat_completions.create" - OPENAI_EMBEDDINGS_CREATE = "openai.embeddings.create" + OPENAI_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.openai" + OPENAI_EMBEDDINGS_CREATE = "ai.embeddings.create.openai" QUEUE_SUBMIT_ARQ = "queue.submit.arq" QUEUE_TASK_ARQ = "queue.task.arq" QUEUE_SUBMIT_CELERY = "queue.submit.celery" diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 56d20613db..5c05a43916 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -50,7 +50,7 @@ def count_tokens(s): class OpenAIIntegration(Integration): identifier = "openai" - def __init__(self, include_prompts=False): + def __init__(self, include_prompts=True): # type: (OpenAIIntegration, bool) -> None self.include_prompts = include_prompts @@ -159,13 +159,13 @@ def new_chat_completion(*args, **kwargs): raise e from None with capture_internal_exceptions(): - if _should_send_default_pii() or integration.include_prompts: + if _should_send_default_pii() and integration.include_prompts: span.set_data("ai.input_messages", messages) span.set_data("ai.model_id", model) span.set_data("ai.streaming", streaming) if hasattr(res, "choices"): - if _should_send_default_pii() or integration.include_prompts: + if _should_send_default_pii() and integration.include_prompts: span.set_data( "ai.responses", list(map(lambda x: x.message, res.choices)) ) @@ -198,7 +198,7 @@ def new_iterator(): ) if ( _should_send_default_pii() - or integration.include_prompts + and integration.include_prompts ): span.set_data("ai.responses", all_responses) _calculate_chat_completion_usage( @@ -235,7 +235,7 @@ def new_embeddings_create(*args, **kwargs): description="OpenAI Embedding Creation", ) as span: if "input" in kwargs and ( - _should_send_default_pii() or integration.include_prompts + _should_send_default_pii() and integration.include_prompts ): if isinstance(kwargs["input"], str): span.set_data("ai.input_messages", [kwargs["input"]]) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 761f605168..d710d2208a 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -17,33 +17,39 @@ from unittest import mock # python 3.3 and above +EXAMPLE_CHAT_COMPLETION = ChatCompletion( + id="chat-id", + choices=[ + Choice( + index=0, + finish_reason="stop", + message=ChatCompletionMessage( + role="assistant", content="the model response" + ), + ) + ], + created=10000000, + model="model-id", + object="chat.completion", + usage=CompletionUsage( + completion_tokens=10, + prompt_tokens=20, + total_tokens=30, + ), +) + + def test_nonstreaming_chat_completion(sentry_init, capture_events): sentry_init( - integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0 + integrations=[OpenAIIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, ) events = capture_events() client = OpenAI(api_key="z") - returned_chat = ChatCompletion( - id="chat-id", - choices=[ - Choice( - index=0, - finish_reason="stop", - message=ChatCompletionMessage(role="assistant", content="response"), - ) - ], - created=10000000, - model="model-id", - object="chat.completion", - usage=CompletionUsage( - completion_tokens=10, - prompt_tokens=20, - total_tokens=30, - ), - ) + client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) - client.chat.completions._post = mock.Mock(return_value=returned_chat) with start_transaction(name="openai tx"): response = ( client.chat.completions.create( @@ -53,17 +59,63 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events): .message.content ) - assert response == "response" + assert response == "the model response" tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] - assert span["op"] == "openai.chat_completions.create" + assert span["op"] == "ai.chat_completions.create.openai" + assert "the model response" in span["data"]["ai.responses"][0] assert span["data"][COMPLETION_TOKENS_USED] == 10 assert span["data"][PROMPT_TOKENS_USED] == 20 assert span["data"][TOTAL_TOKENS_USED] == 30 +def test_stripped_pii_without_send_default_pii(sentry_init, capture_events): + sentry_init( + integrations=[OpenAIIntegration()], + traces_sample_rate=1.0, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert "ai.input_messages" not in span["data"] + assert "ai.responses" not in span["data"] + + +def test_stripped_pii_without_send_prompts(sentry_init, capture_events): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=False)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert "ai.input_messages" not in span["data"] + assert "ai.responses" not in span["data"] + + # noinspection PyTypeChecker def test_streaming_chat_completion(sentry_init, capture_events): sentry_init( @@ -121,7 +173,7 @@ def test_streaming_chat_completion(sentry_init, capture_events): tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] - assert span["op"] == "openai.chat_completions.create" + assert span["op"] == "ai.chat_completions.create.openai" try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import @@ -134,9 +186,7 @@ def test_streaming_chat_completion(sentry_init, capture_events): def test_bad_chat_completion(sentry_init, capture_events): - sentry_init( - integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0 - ) + sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) events = capture_events() client = OpenAI(api_key="z") @@ -153,9 +203,7 @@ def test_bad_chat_completion(sentry_init, capture_events): def test_embeddings_create(sentry_init, capture_events): - sentry_init( - integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0 - ) + sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) events = capture_events() client = OpenAI(api_key="z") @@ -181,7 +229,7 @@ def test_embeddings_create(sentry_init, capture_events): tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] - assert span["op"] == "openai.embeddings.create" + assert span["op"] == "ai.embeddings.create.openai" assert span["data"][PROMPT_TOKENS_USED] == 20 assert span["data"][TOTAL_TOKENS_USED] == 30 From d1eae09e9cf951e65700c3106df53478e422d940 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Fri, 8 Mar 2024 10:34:41 -0500 Subject: [PATCH 19/20] More test coverage for PII --- tests/integrations/openai/test_openai.py | 104 +++++++++++------------ 1 file changed, 50 insertions(+), 54 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index d710d2208a..ecdedd2694 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -39,11 +39,17 @@ ) -def test_nonstreaming_chat_completion(sentry_init, capture_events): +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_nonstreaming_chat_completion( + sentry_init, capture_events, send_default_pii, include_prompts +): sentry_init( - integrations=[OpenAIIntegration()], + integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, - send_default_pii=True, + send_default_pii=send_default_pii, ) events = capture_events() @@ -64,62 +70,31 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events): assert tx["type"] == "transaction" span = tx["spans"][0] assert span["op"] == "ai.chat_completions.create.openai" - assert "the model response" in span["data"]["ai.responses"][0] + + if send_default_pii and include_prompts: + assert "hello" in span["data"]["ai.input_messages"][0]["content"] + assert "the model response" in span["data"]["ai.responses"][0] + else: + assert "ai.input_messages" not in span["data"] + assert "ai.responses" not in span["data"] assert span["data"][COMPLETION_TOKENS_USED] == 10 assert span["data"][PROMPT_TOKENS_USED] == 20 assert span["data"][TOTAL_TOKENS_USED] == 30 -def test_stripped_pii_without_send_default_pii(sentry_init, capture_events): - sentry_init( - integrations=[OpenAIIntegration()], - traces_sample_rate=1.0, - ) - events = capture_events() - - client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) - - with start_transaction(name="openai tx"): - client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] - ) - - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert "ai.input_messages" not in span["data"] - assert "ai.responses" not in span["data"] - - -def test_stripped_pii_without_send_prompts(sentry_init, capture_events): - sentry_init( - integrations=[OpenAIIntegration(include_prompts=False)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - events = capture_events() - - client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) - - with start_transaction(name="openai tx"): - client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] - ) - - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert "ai.input_messages" not in span["data"] - assert "ai.responses" not in span["data"] - - # noinspection PyTypeChecker -def test_streaming_chat_completion(sentry_init, capture_events): +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_streaming_chat_completion( + sentry_init, capture_events, send_default_pii, include_prompts +): sentry_init( - integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0 + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, ) events = capture_events() @@ -175,6 +150,13 @@ def test_streaming_chat_completion(sentry_init, capture_events): span = tx["spans"][0] assert span["op"] == "ai.chat_completions.create.openai" + if send_default_pii and include_prompts: + assert "hello" in span["data"]["ai.input_messages"][0]["content"] + assert "hello world" in span["data"]["ai.responses"][0] + else: + assert "ai.input_messages" not in span["data"] + assert "ai.responses" not in span["data"] + try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import @@ -202,8 +184,18 @@ def test_bad_chat_completion(sentry_init, capture_events): assert event["level"] == "error" -def test_embeddings_create(sentry_init, capture_events): - sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_embeddings_create( + sentry_init, capture_events, send_default_pii, include_prompts +): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) events = capture_events() client = OpenAI(api_key="z") @@ -221,7 +213,7 @@ def test_embeddings_create(sentry_init, capture_events): client.embeddings._post = mock.Mock(return_value=returned_embedding) with start_transaction(name="openai tx"): response = client.embeddings.create( - input="test", model="text-embedding-3-large" + input="hello", model="text-embedding-3-large" ) assert len(response.data[0].embedding) == 3 @@ -230,6 +222,10 @@ def test_embeddings_create(sentry_init, capture_events): assert tx["type"] == "transaction" span = tx["spans"][0] assert span["op"] == "ai.embeddings.create.openai" + if send_default_pii and include_prompts: + assert "hello" in span["data"]["ai.input_messages"][0] + else: + assert "ai.input_messages" not in span["data"] assert span["data"][PROMPT_TOKENS_USED] == 20 assert span["data"][TOTAL_TOKENS_USED] == 30 From c96e0e59ab3744073f9348c7a4671baf3a8a3a3d Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Fri, 8 Mar 2024 10:39:26 -0500 Subject: [PATCH 20/20] notiktoken --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index dcb61f5ddb..1e7ba06a00 100644 --- a/tox.ini +++ b/tox.ini @@ -449,7 +449,7 @@ deps = openai-v1: tiktoken~=0.6.0 openai-latest: openai openai-latest: tiktoken~=0.6.0 - openai-without-tiktoken: openai + openai-notiktoken: openai # OpenTelemetry (OTel) opentelemetry: opentelemetry-distro