Skip to content

Commit ffdc42b

Browse files
author
SentienceDEV
committed
fix cloud tracing index issue
1 parent 4132af4 commit ffdc42b

File tree

2 files changed

+152
-9
lines changed

2 files changed

+152
-9
lines changed

sentience/cloud_tracing.py

Lines changed: 66 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,56 @@ def _complete_trace(self) -> None:
581581
if self.logger:
582582
self.logger.warning(f"Error reporting trace completion: {e}")
583583

584+
def _normalize_screenshot_data(
585+
self, screenshot_raw: str, default_format: str = "jpeg"
586+
) -> tuple[str, str]:
587+
"""
588+
Normalize screenshot data by extracting base64 from data URL if needed.
589+
590+
Handles both formats:
591+
- Data URL: "data:image/jpeg;base64,/9j/4AAQ..."
592+
- Pure base64: "/9j/4AAQ..."
593+
594+
Args:
595+
screenshot_raw: Raw screenshot data (data URL or base64)
596+
default_format: Default format if not detected from data URL
597+
598+
Returns:
599+
Tuple of (base64_string, format_string)
600+
"""
601+
if not screenshot_raw:
602+
return "", default_format
603+
604+
# Check if it's a data URL
605+
if screenshot_raw.startswith("data:image"):
606+
# Extract format from "data:image/jpeg;base64,..." or "data:image/png;base64,..."
607+
try:
608+
# Split on comma to get the base64 part
609+
if "," in screenshot_raw:
610+
header, base64_data = screenshot_raw.split(",", 1)
611+
# Extract format from header: "data:image/jpeg;base64"
612+
if "/" in header and ";" in header:
613+
format_part = header.split("/")[1].split(";")[0]
614+
if format_part in ("jpeg", "jpg"):
615+
return base64_data, "jpeg"
616+
elif format_part == "png":
617+
return base64_data, "png"
618+
return base64_data, default_format
619+
else:
620+
# Malformed data URL - return as-is with warning
621+
if self.logger:
622+
self.logger.warning(
623+
"Malformed data URL in screenshot_base64 (missing comma)"
624+
)
625+
return screenshot_raw, default_format
626+
except Exception as e:
627+
if self.logger:
628+
self.logger.warning(f"Error parsing screenshot data URL: {e}")
629+
return screenshot_raw, default_format
630+
631+
# Already pure base64
632+
return screenshot_raw, default_format
633+
584634
def _extract_screenshots_from_trace(self) -> dict[int, dict[str, Any]]:
585635
"""
586636
Extract screenshots from trace events.
@@ -604,15 +654,22 @@ def _extract_screenshots_from_trace(self) -> dict[int, dict[str, Any]]:
604654
# Check if this is a snapshot event with screenshot
605655
if event.get("type") == "snapshot":
606656
data = event.get("data", {})
607-
screenshot_base64 = data.get("screenshot_base64")
608-
609-
if screenshot_base64:
610-
sequence += 1
611-
screenshots[sequence] = {
612-
"base64": screenshot_base64,
613-
"format": data.get("screenshot_format", "jpeg"),
614-
"step_id": event.get("step_id"),
615-
}
657+
screenshot_raw = data.get("screenshot_base64")
658+
659+
if screenshot_raw:
660+
# Normalize: extract base64 from data URL if needed
661+
# Handles both "data:image/jpeg;base64,..." and pure base64
662+
screenshot_base64, screenshot_format = self._normalize_screenshot_data(
663+
screenshot_raw,
664+
data.get("screenshot_format", "jpeg"),
665+
)
666+
if screenshot_base64:
667+
sequence += 1
668+
screenshots[sequence] = {
669+
"base64": screenshot_base64,
670+
"format": screenshot_format,
671+
"step_id": event.get("step_id"),
672+
}
616673
except Exception as e:
617674
if self.logger:
618675
self.logger.error(f"Error extracting screenshots: {e}")

tests/test_cloud_tracing.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,92 @@ def post_side_effect(*args, **kwargs):
394394
if cleaned_trace_path.exists():
395395
os.remove(cleaned_trace_path)
396396

397+
def test_normalize_screenshot_data_handles_data_url(self):
398+
"""Test that _normalize_screenshot_data extracts base64 from data URLs."""
399+
upload_url = "https://sentience.nyc3.digitaloceanspaces.com/user123/run456/trace.jsonl.gz"
400+
run_id = f"test-run-{uuid.uuid4().hex[:8]}"
401+
402+
sink = CloudTraceSink(upload_url, run_id=run_id)
403+
404+
# Test JPEG data URL
405+
jpeg_data_url = "data:image/jpeg;base64,/9j/4AAQSkZJRg..."
406+
base64_str, fmt = sink._normalize_screenshot_data(jpeg_data_url)
407+
assert base64_str == "/9j/4AAQSkZJRg..."
408+
assert fmt == "jpeg"
409+
410+
# Test PNG data URL
411+
png_data_url = "data:image/png;base64,iVBORw0KGgoAAAA..."
412+
base64_str, fmt = sink._normalize_screenshot_data(png_data_url)
413+
assert base64_str == "iVBORw0KGgoAAAA..."
414+
assert fmt == "png"
415+
416+
# Test pure base64 (should pass through unchanged)
417+
pure_base64 = "/9j/4AAQSkZJRg..."
418+
base64_str, fmt = sink._normalize_screenshot_data(pure_base64, "jpeg")
419+
assert base64_str == "/9j/4AAQSkZJRg..."
420+
assert fmt == "jpeg"
421+
422+
# Test empty string
423+
base64_str, fmt = sink._normalize_screenshot_data("")
424+
assert base64_str == ""
425+
assert fmt == "jpeg"
426+
427+
# Cleanup
428+
cache_dir = Path.home() / ".sentience" / "traces" / "pending"
429+
trace_path = cache_dir / f"{run_id}.jsonl"
430+
if trace_path.exists():
431+
os.remove(trace_path)
432+
433+
def test_cloud_trace_sink_handles_data_url_in_screenshot(self):
434+
"""Test that CloudTraceSink properly extracts screenshots from data URLs."""
435+
upload_url = "https://sentience.nyc3.digitaloceanspaces.com/user123/run456/trace.jsonl.gz"
436+
run_id = f"test-run-{uuid.uuid4().hex[:8]}"
437+
api_key = "sk_test_123"
438+
439+
# Create test screenshot as a data URL (how langchain-debugging was sending it)
440+
test_image_base64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
441+
data_url = f"data:image/png;base64,{test_image_base64}"
442+
443+
sink = CloudTraceSink(upload_url, run_id=run_id, api_key=api_key)
444+
445+
# Emit trace event with screenshot as data URL (not pure base64)
446+
sink.emit(
447+
{
448+
"v": 1,
449+
"type": "snapshot",
450+
"ts": "2026-01-01T00:00:00.000Z",
451+
"run_id": run_id,
452+
"seq": 1,
453+
"step_id": "step-1",
454+
"data": {
455+
"url": "https://example.com",
456+
"element_count": 10,
457+
"screenshot_base64": data_url, # Data URL, not pure base64
458+
"screenshot_format": "png",
459+
},
460+
}
461+
)
462+
463+
# Extract screenshots - should normalize data URL to pure base64
464+
screenshots = sink._extract_screenshots_from_trace()
465+
466+
assert len(screenshots) == 1
467+
assert 1 in screenshots
468+
# Verify the base64 was extracted from data URL (no "data:image" prefix)
469+
assert screenshots[1]["base64"] == test_image_base64
470+
assert not screenshots[1]["base64"].startswith("data:")
471+
assert screenshots[1]["format"] == "png"
472+
473+
# Cleanup
474+
sink.close()
475+
cache_dir = Path.home() / ".sentience" / "traces" / "pending"
476+
trace_path = cache_dir / f"{run_id}.jsonl"
477+
cleaned_trace_path = cache_dir / f"{run_id}.cleaned.jsonl"
478+
if trace_path.exists():
479+
os.remove(trace_path)
480+
if cleaned_trace_path.exists():
481+
os.remove(cleaned_trace_path)
482+
397483

398484
class TestTracerFactory:
399485
"""Test create_tracer factory function."""

0 commit comments

Comments
 (0)