@@ -2265,66 +2265,107 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_events):
22652265 assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE ] == 20
22662266
22672267
2268- def test_input_tokens_include_cached_nonstreaming (sentry_init , capture_events ):
2268+ def test_input_tokens_include_cache_write_nonstreaming (sentry_init , capture_events ):
22692269 """
2270- Test that gen_ai.usage.input_tokens includes cached tokens.
2270+ Test that gen_ai.usage.input_tokens includes cache_write tokens (non-streaming) .
22712271
2272- Anthropic's usage.input_tokens excludes cached/cache_write tokens,
2273- but gen_ai.usage.input_tokens should be the TOTAL input tokens
2274- (including cached + cache_write) so that downstream cost calculations
2275- don't produce negative values.
2272+ Reproduces a real Anthropic cache-write response. Anthropic's usage.input_tokens
2273+ only counts non-cached tokens, but gen_ai.usage.input_tokens should be the TOTAL
2274+ so downstream cost calculations don't produce negative values.
22762275
2277- See: negative gen_ai.cost.input_tokens bug when cache_read > input_tokens.
2276+ Real Anthropic response (from E2E test):
2277+ Usage(input_tokens=19, output_tokens=14,
2278+ cache_creation_input_tokens=2846, cache_read_input_tokens=0)
22782279 """
22792280 sentry_init (integrations = [AnthropicIntegration ()], traces_sample_rate = 1.0 )
22802281 events = capture_events ()
22812282 client = Anthropic (api_key = "z" )
22822283
2283- # Simulate Anthropic response where input_tokens=100 EXCLUDES cached tokens
2284- # cache_read=80 and cache_write=20 are separate
2285- # Total input tokens processed = 100 + 80 + 20 = 200
22862284 client .messages ._post = mock .Mock (
22872285 return_value = Message (
22882286 id = "id" ,
2289- model = "claude-3-5- sonnet-20241022 " ,
2287+ model = "claude-sonnet-4-20250514 " ,
22902288 role = "assistant" ,
2291- content = [TextBlock (type = "text" , text = "Response " )],
2289+ content = [TextBlock (type = "text" , text = "3 + 3 equals 6. " )],
22922290 type = "message" ,
22932291 usage = Usage (
2294- input_tokens = 100 ,
2295- output_tokens = 50 ,
2296- cache_read_input_tokens = 80 ,
2297- cache_creation_input_tokens = 20 ,
2292+ input_tokens = 19 ,
2293+ output_tokens = 14 ,
2294+ cache_read_input_tokens = 0 ,
2295+ cache_creation_input_tokens = 2846 ,
22982296 ),
22992297 )
23002298 )
23012299
23022300 with start_transaction (name = "anthropic" ):
23032301 client .messages .create (
23042302 max_tokens = 1024 ,
2305- messages = [{"role" : "user" , "content" : "Hello " }],
2306- model = "claude-3-5- sonnet-20241022 " ,
2303+ messages = [{"role" : "user" , "content" : "What is 3+3? " }],
2304+ model = "claude-sonnet-4-20250514 " ,
23072305 )
23082306
23092307 (span ,) = events [0 ]["spans" ]
23102308
2311- # input_tokens should be total: 100 (non-cached) + 80 (cache_read) + 20 (cache_write) = 200
2312- assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 200
2309+ # input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865
2310+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 2865
2311+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 2879 # 2865 + 14
2312+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHED ] == 0
2313+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE ] == 2846
23132314
2314- # total_tokens should include the full input count
2315- assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 250 # 200 + 50
23162315
2317- # Cache fields should still be reported correctly
2318- assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHED ] == 80
2319- assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE ] == 20
2316+ def test_input_tokens_include_cache_read_nonstreaming (sentry_init , capture_events ):
2317+ """
2318+ Test that gen_ai.usage.input_tokens includes cache_read tokens (non-streaming).
2319+
2320+ Reproduces a real Anthropic cache-hit response. This is the scenario that
2321+ caused negative gen_ai.cost.input_tokens: input_tokens=19 but cached=2846,
2322+ so the backend computed 19 - 2846 = -2827 "regular" tokens.
2323+
2324+ Real Anthropic response (from E2E test):
2325+ Usage(input_tokens=19, output_tokens=14,
2326+ cache_creation_input_tokens=0, cache_read_input_tokens=2846)
2327+ """
2328+ sentry_init (integrations = [AnthropicIntegration ()], traces_sample_rate = 1.0 )
2329+ events = capture_events ()
2330+ client = Anthropic (api_key = "z" )
2331+
2332+ client .messages ._post = mock .Mock (
2333+ return_value = Message (
2334+ id = "id" ,
2335+ model = "claude-sonnet-4-20250514" ,
2336+ role = "assistant" ,
2337+ content = [TextBlock (type = "text" , text = "5 + 5 = 10." )],
2338+ type = "message" ,
2339+ usage = Usage (
2340+ input_tokens = 19 ,
2341+ output_tokens = 14 ,
2342+ cache_read_input_tokens = 2846 ,
2343+ cache_creation_input_tokens = 0 ,
2344+ ),
2345+ )
2346+ )
23202347
2348+ with start_transaction (name = "anthropic" ):
2349+ client .messages .create (
2350+ max_tokens = 1024 ,
2351+ messages = [{"role" : "user" , "content" : "What is 5+5?" }],
2352+ model = "claude-sonnet-4-20250514" ,
2353+ )
2354+
2355+ (span ,) = events [0 ]["spans" ]
23212356
2322- def test_input_tokens_include_cached_streaming (sentry_init , capture_events ):
2357+ # input_tokens should be total: 19 (non-cached) + 2846 (cache_read) = 2865
2358+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 2865
2359+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 2879 # 2865 + 14
2360+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHED ] == 2846
2361+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE ] == 0
2362+
2363+
2364+ def test_input_tokens_include_cache_read_streaming (sentry_init , capture_events ):
23232365 """
2324- Test that gen_ai.usage.input_tokens includes cached tokens for streaming responses .
2366+ Test that gen_ai.usage.input_tokens includes cache_read tokens ( streaming) .
23252367
2326- Same bug as non-streaming: Anthropic's input_tokens excludes cached tokens,
2327- leading to negative cost calculations when cache_read > input_tokens.
2368+ Same cache-hit scenario as non-streaming, using realistic streaming events.
23282369 """
23292370 client = Anthropic (api_key = "z" )
23302371 returned_stream = Stream (cast_to = None , response = None , client = client )
@@ -2333,22 +2374,22 @@ def test_input_tokens_include_cached_streaming(sentry_init, capture_events):
23332374 type = "message_start" ,
23342375 message = Message (
23352376 id = "id" ,
2336- model = "claude-3-5- sonnet-20241022 " ,
2377+ model = "claude-sonnet-4-20250514 " ,
23372378 role = "assistant" ,
23382379 content = [],
23392380 type = "message" ,
23402381 usage = Usage (
2341- input_tokens = 100 ,
2382+ input_tokens = 19 ,
23422383 output_tokens = 0 ,
2343- cache_read_input_tokens = 80 ,
2344- cache_creation_input_tokens = 20 ,
2384+ cache_read_input_tokens = 2846 ,
2385+ cache_creation_input_tokens = 0 ,
23452386 ),
23462387 ),
23472388 ),
23482389 MessageDeltaEvent (
23492390 type = "message_delta" ,
23502391 delta = Delta (stop_reason = "end_turn" ),
2351- usage = MessageDeltaUsage (output_tokens = 50 ),
2392+ usage = MessageDeltaUsage (output_tokens = 14 ),
23522393 ),
23532394 ]
23542395
@@ -2359,29 +2400,27 @@ def test_input_tokens_include_cached_streaming(sentry_init, capture_events):
23592400 with start_transaction (name = "anthropic" ):
23602401 for _ in client .messages .create (
23612402 max_tokens = 1024 ,
2362- messages = [{"role" : "user" , "content" : "Hello " }],
2363- model = "claude-3-5- sonnet-20241022 " ,
2403+ messages = [{"role" : "user" , "content" : "What is 5+5? " }],
2404+ model = "claude-sonnet-4-20250514 " ,
23642405 stream = True ,
23652406 ):
23662407 pass
23672408
23682409 (span ,) = events [0 ]["spans" ]
23692410
2370- # input_tokens should be total: 100 + 80 + 20 = 200
2371- assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 200
2372-
2373- # total_tokens should include the full input count
2374- assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 250 # 200 + 50
2375-
2376- # Cache fields should still be reported correctly
2377- assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHED ] == 80
2378- assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE ] == 20
2411+ # input_tokens should be total: 19 + 2846 = 2865
2412+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 2865
2413+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 2879 # 2865 + 14
2414+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHED ] == 2846
2415+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE ] == 0
23792416
23802417
23812418def test_input_tokens_unchanged_without_caching (sentry_init , capture_events ):
23822419 """
23832420 Test that input_tokens is unchanged when there are no cached tokens.
2384- Ensures the fix doesn't break the non-caching case.
2421+
2422+ Real Anthropic response (from E2E test, simple call without caching):
2423+ Usage(input_tokens=20, output_tokens=12)
23852424 """
23862425 sentry_init (integrations = [AnthropicIntegration ()], traces_sample_rate = 1.0 )
23872426 events = capture_events ()
@@ -2390,29 +2429,28 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_events):
23902429 client .messages ._post = mock .Mock (
23912430 return_value = Message (
23922431 id = "id" ,
2393- model = "claude-3-5- sonnet-20241022 " ,
2432+ model = "claude-sonnet-4-20250514 " ,
23942433 role = "assistant" ,
2395- content = [TextBlock (type = "text" , text = "Response " )],
2434+ content = [TextBlock (type = "text" , text = "2+2 equals 4. " )],
23962435 type = "message" ,
23972436 usage = Usage (
2398- input_tokens = 100 ,
2399- output_tokens = 50 ,
2437+ input_tokens = 20 ,
2438+ output_tokens = 12 ,
24002439 ),
24012440 )
24022441 )
24032442
24042443 with start_transaction (name = "anthropic" ):
24052444 client .messages .create (
24062445 max_tokens = 1024 ,
2407- messages = [{"role" : "user" , "content" : "Hello " }],
2408- model = "claude-3-5- sonnet-20241022 " ,
2446+ messages = [{"role" : "user" , "content" : "What is 2+2? " }],
2447+ model = "claude-sonnet-4-20250514 " ,
24092448 )
24102449
24112450 (span ,) = events [0 ]["spans" ]
24122451
2413- # Without caching, input_tokens should remain as-is
2414- assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 100
2415- assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 150 # 100 + 50
2452+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 20
2453+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 32 # 20 + 12
24162454
24172455
24182456def test_cache_tokens_streaming (sentry_init , capture_events ):
0 commit comments