Skip to content

Commit 7168d1f

Browse files
committed
fix(anthropic): fix token accounting
1 parent 28d9288 commit 7168d1f

File tree

2 files changed

+165
-2
lines changed

2 files changed

+165
-2
lines changed

sentry_sdk/integrations/anthropic.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@ def _get_token_usage(result: "Messages") -> "tuple[int, int, int, int]":
108108
):
109109
cache_write_input_tokens = usage.cache_creation_input_tokens
110110

111+
# Anthropic's input_tokens excludes cached/cache_write tokens.
112+
# Normalize to total input tokens so downstream cost calculations
113+
# (input_tokens - cached) don't produce negative values.
114+
input_tokens += cache_read_input_tokens + cache_write_input_tokens
115+
111116
return (
112117
input_tokens,
113118
output_tokens,
@@ -466,11 +471,15 @@ def new_iterator() -> "Iterator[MessageStreamEvent]":
466471
)
467472
yield event
468473

474+
# Anthropic's input_tokens excludes cached/cache_write tokens.
475+
# Normalize to total input tokens for correct cost calculations.
476+
total_input = usage.input_tokens + (usage.cache_read_input_tokens or 0) + (usage.cache_write_input_tokens or 0)
477+
469478
_set_output_data(
470479
span=span,
471480
integration=integration,
472481
model=model,
473-
input_tokens=usage.input_tokens,
482+
input_tokens=total_input,
474483
output_tokens=usage.output_tokens,
475484
cache_read_input_tokens=usage.cache_read_input_tokens,
476485
cache_write_input_tokens=usage.cache_write_input_tokens,
@@ -496,11 +505,15 @@ async def new_iterator_async() -> "AsyncIterator[MessageStreamEvent]":
496505
)
497506
yield event
498507

508+
# Anthropic's input_tokens excludes cached/cache_write tokens.
509+
# Normalize to total input tokens for correct cost calculations.
510+
total_input = usage.input_tokens + (usage.cache_read_input_tokens or 0) + (usage.cache_write_input_tokens or 0)
511+
499512
_set_output_data(
500513
span=span,
501514
integration=integration,
502515
model=model,
503-
input_tokens=usage.input_tokens,
516+
input_tokens=total_input,
504517
output_tokens=usage.output_tokens,
505518
cache_read_input_tokens=usage.cache_read_input_tokens,
506519
cache_write_input_tokens=usage.cache_write_input_tokens,

tests/integrations/anthropic/test_anthropic.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2265,6 +2265,156 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_events):
22652265
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
22662266

22672267

2268+
def test_input_tokens_include_cached_nonstreaming(sentry_init, capture_events):
2269+
"""
2270+
Test that gen_ai.usage.input_tokens includes cached tokens.
2271+
2272+
Anthropic's usage.input_tokens excludes cached/cache_write tokens,
2273+
but gen_ai.usage.input_tokens should be the TOTAL input tokens
2274+
(including cached + cache_write) so that downstream cost calculations
2275+
don't produce negative values.
2276+
2277+
See: negative gen_ai.cost.input_tokens bug when cache_read > input_tokens.
2278+
"""
2279+
sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
2280+
events = capture_events()
2281+
client = Anthropic(api_key="z")
2282+
2283+
# Simulate Anthropic response where input_tokens=100 EXCLUDES cached tokens
2284+
# cache_read=80 and cache_write=20 are separate
2285+
# Total input tokens processed = 100 + 80 + 20 = 200
2286+
client.messages._post = mock.Mock(
2287+
return_value=Message(
2288+
id="id",
2289+
model="claude-3-5-sonnet-20241022",
2290+
role="assistant",
2291+
content=[TextBlock(type="text", text="Response")],
2292+
type="message",
2293+
usage=Usage(
2294+
input_tokens=100,
2295+
output_tokens=50,
2296+
cache_read_input_tokens=80,
2297+
cache_creation_input_tokens=20,
2298+
),
2299+
)
2300+
)
2301+
2302+
with start_transaction(name="anthropic"):
2303+
client.messages.create(
2304+
max_tokens=1024,
2305+
messages=[{"role": "user", "content": "Hello"}],
2306+
model="claude-3-5-sonnet-20241022",
2307+
)
2308+
2309+
(span,) = events[0]["spans"]
2310+
2311+
# input_tokens should be total: 100 (non-cached) + 80 (cache_read) + 20 (cache_write) = 200
2312+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
2313+
2314+
# total_tokens should include the full input count
2315+
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250 # 200 + 50
2316+
2317+
# Cache fields should still be reported correctly
2318+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
2319+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
2320+
2321+
2322+
def test_input_tokens_include_cached_streaming(sentry_init, capture_events):
2323+
"""
2324+
Test that gen_ai.usage.input_tokens includes cached tokens for streaming responses.
2325+
2326+
Same bug as non-streaming: Anthropic's input_tokens excludes cached tokens,
2327+
leading to negative cost calculations when cache_read > input_tokens.
2328+
"""
2329+
client = Anthropic(api_key="z")
2330+
returned_stream = Stream(cast_to=None, response=None, client=client)
2331+
returned_stream._iterator = [
2332+
MessageStartEvent(
2333+
type="message_start",
2334+
message=Message(
2335+
id="id",
2336+
model="claude-3-5-sonnet-20241022",
2337+
role="assistant",
2338+
content=[],
2339+
type="message",
2340+
usage=Usage(
2341+
input_tokens=100,
2342+
output_tokens=0,
2343+
cache_read_input_tokens=80,
2344+
cache_creation_input_tokens=20,
2345+
),
2346+
),
2347+
),
2348+
MessageDeltaEvent(
2349+
type="message_delta",
2350+
delta=Delta(stop_reason="end_turn"),
2351+
usage=MessageDeltaUsage(output_tokens=50),
2352+
),
2353+
]
2354+
2355+
sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
2356+
events = capture_events()
2357+
client.messages._post = mock.Mock(return_value=returned_stream)
2358+
2359+
with start_transaction(name="anthropic"):
2360+
for _ in client.messages.create(
2361+
max_tokens=1024,
2362+
messages=[{"role": "user", "content": "Hello"}],
2363+
model="claude-3-5-sonnet-20241022",
2364+
stream=True,
2365+
):
2366+
pass
2367+
2368+
(span,) = events[0]["spans"]
2369+
2370+
# input_tokens should be total: 100 + 80 + 20 = 200
2371+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
2372+
2373+
# total_tokens should include the full input count
2374+
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250 # 200 + 50
2375+
2376+
# Cache fields should still be reported correctly
2377+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
2378+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
2379+
2380+
2381+
def test_input_tokens_unchanged_without_caching(sentry_init, capture_events):
2382+
"""
2383+
Test that input_tokens is unchanged when there are no cached tokens.
2384+
Ensures the fix doesn't break the non-caching case.
2385+
"""
2386+
sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
2387+
events = capture_events()
2388+
client = Anthropic(api_key="z")
2389+
2390+
client.messages._post = mock.Mock(
2391+
return_value=Message(
2392+
id="id",
2393+
model="claude-3-5-sonnet-20241022",
2394+
role="assistant",
2395+
content=[TextBlock(type="text", text="Response")],
2396+
type="message",
2397+
usage=Usage(
2398+
input_tokens=100,
2399+
output_tokens=50,
2400+
),
2401+
)
2402+
)
2403+
2404+
with start_transaction(name="anthropic"):
2405+
client.messages.create(
2406+
max_tokens=1024,
2407+
messages=[{"role": "user", "content": "Hello"}],
2408+
model="claude-3-5-sonnet-20241022",
2409+
)
2410+
2411+
(span,) = events[0]["spans"]
2412+
2413+
# Without caching, input_tokens should remain as-is
2414+
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 100
2415+
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 150 # 100 + 50
2416+
2417+
22682418
def test_cache_tokens_streaming(sentry_init, capture_events):
22692419
"""Test cache tokens are tracked for streaming responses."""
22702420
client = Anthropic(api_key="z")

0 commit comments

Comments
 (0)