From 6de3adf511874c796a9f6573999c1ee0166c454a Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 31 Jul 2025 19:34:02 +0000 Subject: [PATCH] Fix file upload serialization by removing unnecessary array notation Co-authored-by: chris.freeman --- FILE_UPLOAD_FIX.md | 84 +++++++++++++++++++++++++++++ file_upload_example.py | 67 +++++++++++++++++++++++ src/glean/api_client/utils/forms.py | 4 +- test_file_upload.py | 74 +++++++++++++++++++++++++ 4 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 FILE_UPLOAD_FIX.md create mode 100644 file_upload_example.py create mode 100644 test_file_upload.py diff --git a/FILE_UPLOAD_FIX.md b/FILE_UPLOAD_FIX.md new file mode 100644 index 00000000..0a3ddd01 --- /dev/null +++ b/FILE_UPLOAD_FIX.md @@ -0,0 +1,84 @@ +# File Upload Fix for Glean SDK + +## Issue Description + +The Glean SDK was failing to upload files with a 400 error, even though the same request worked correctly when sent via cURL. The issue was in the multipart form data serialization logic. + +## Root Cause + +The problem was in the `serialize_multipart_form` function in `src/glean/api_client/utils/forms.py`. When handling arrays of files, the code was incorrectly appending `"[]"` to the field name: + +```python +# Before fix (incorrect) +files.append((f_name + "[]", (file_name, content, content_type))) +``` + +This caused the SDK to send field names like `"files[]"` instead of the expected `"files"`. + +## The Fix + +**File:** `src/glean/api_client/utils/forms.py` + +**Change:** Removed the `"[]"` suffix when handling file arrays in multipart form serialization. + +```python +# After fix (correct) +files.append((f_name, (file_name, content, content_type))) +``` + +## Why This Fixes the Issue + +1. **Server Expectation**: The Glean API server expects file uploads with the field name `"files"` (as shown in the working cURL example: `--form files=@test_input.txt`) + +2. **SDK Behavior**: The SDK was sending `"files[]"` instead of `"files"`, causing the server to reject the request with a 400 error. + +3. **Array Handling**: The `"[]"` suffix was being added to indicate array fields, but for file uploads, each file in the array should use the base field name `"files"`. + +## Testing the Fix + +You can test the fix using the provided test script: + +```bash +# Set your API token +export GLEAN_API_TOKEN="your-api-token-here" + +# Run the test +python test_file_upload.py +``` + +## Correct Usage Pattern + +After the fix, the correct usage pattern remains the same: + +```python +from glean.api_client import Glean, errors, models + +with Glean( + api_token=GLEAN_TOKEN, + instance="scio-prod", +) as glean: + try: + upload_response = glean.client.chat.upload_files( + files=[ + models.File( + file_name="test_input.txt", + content=open(DOCUMENT_PATH, "rb"), + content_type="text/plain", + ) + ] + ) + except GleanError as e: + print(f"Error uploading file: {e}") + print(e.raw_response) +``` + +## Files Modified + +1. `src/glean/api_client/utils/forms.py` - Fixed multipart form serialization +2. `test_file_upload.py` - Test script to verify the fix +3. `file_upload_example.py` - Example showing correct usage +4. `FILE_UPLOAD_FIX.md` - This documentation + +## Impact + +This fix resolves the 400 error when uploading files through the Glean SDK, making the SDK behavior consistent with direct API calls via cURL. \ No newline at end of file diff --git a/file_upload_example.py b/file_upload_example.py new file mode 100644 index 00000000..e850f423 --- /dev/null +++ b/file_upload_example.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +""" +Example showing the correct way to upload files using the Glean SDK. + +This example demonstrates the fix for the file upload issue where the SDK +was incorrectly appending "[]" to field names in multipart form data. +""" + +import os +from pprint import pprint +from glean.api_client import Glean, errors, models + +def upload_file_example(): + """ + Example of uploading a file to Glean chat. + + This shows the correct usage pattern after the multipart form + serialization fix. + """ + + # Configuration + GLEAN_TOKEN = os.getenv("GLEAN_API_TOKEN") + DOCUMENT_PATH = "test_input.txt" # Your file path here + + if not GLEAN_TOKEN: + print("Error: GLEAN_API_TOKEN environment variable not set") + return + + if not os.path.exists(DOCUMENT_PATH): + print(f"Error: File {DOCUMENT_PATH} does not exist") + return + + print(f"Uploading file: {DOCUMENT_PATH}") + + with Glean( + api_token=GLEAN_TOKEN, + instance="scio-prod", + ) as glean: + try: + # Upload the file using the corrected SDK + upload_response = glean.client.chat.upload_files( + files=[ + models.File( + file_name=os.path.basename(DOCUMENT_PATH), + content=open(DOCUMENT_PATH, "rb"), + content_type="text/plain", + ) + ] + ) + + print("✅ File upload successful!") + print("Upload response:") + pprint(upload_response) + + # You can now use the uploaded file in chat + print("\nFile uploaded successfully. You can now use it in chat conversations.") + + except errors.GleanError as e: + print("❌ File upload failed!") + print(f"Error: {e}") + print(f"Status code: {e.status_code}") + print(f"Raw response: {e.raw_response}") + if hasattr(e, 'body'): + print(f"Response body: {e.body}") + +if __name__ == "__main__": + upload_file_example() \ No newline at end of file diff --git a/src/glean/api_client/utils/forms.py b/src/glean/api_client/utils/forms.py index e873495f..ec8032df 100644 --- a/src/glean/api_client/utils/forms.py +++ b/src/glean/api_client/utils/forms.py @@ -149,9 +149,9 @@ def serialize_multipart_form( file_name, content, content_type = _extract_file_properties(file_obj) if content_type is not None: - files.append((f_name + "[]", (file_name, content, content_type))) + files.append((f_name, (file_name, content, content_type))) else: - files.append((f_name + "[]", (file_name, content))) + files.append((f_name, (file_name, content))) else: # Handle single file file_name, content, content_type = _extract_file_properties(val) diff --git a/test_file_upload.py b/test_file_upload.py new file mode 100644 index 00000000..73767340 --- /dev/null +++ b/test_file_upload.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +""" +Test script to verify the file upload fix works correctly. +""" + +import os +import tempfile +from pprint import pprint + +# Import the SDK +from glean.api_client import Glean, errors, models + +# Create a test file +def create_test_file(): + """Create a temporary test file for upload.""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: + f.write("This is a test file for Glean upload.\n") + f.write("It contains some sample content.\n") + return f.name + +def test_file_upload(): + """Test the file upload functionality.""" + # Create a test file + test_file_path = create_test_file() + + try: + # Get API token from environment + api_token = os.getenv("GLEAN_API_TOKEN") + if not api_token: + print("Error: GLEAN_API_TOKEN environment variable not set") + return + + # Get instance from environment or use default + instance = os.getenv("GLEAN_INSTANCE", "scio-prod") + + print(f"Testing file upload to instance: {instance}") + print(f"Test file: {test_file_path}") + + with Glean( + api_token=api_token, + instance=instance, + ) as glean: + try: + # Upload the file + upload_response = glean.client.chat.upload_files( + files=[ + models.File( + file_name=os.path.basename(test_file_path), + content=open(test_file_path, "rb"), + content_type="text/plain", + ) + ] + ) + + print("✅ File upload successful!") + print("Response:") + pprint(upload_response) + + except errors.GleanError as e: + print("❌ File upload failed!") + print(f"Error: {e}") + print(f"Status code: {e.status_code}") + print(f"Raw response: {e.raw_response}") + if hasattr(e, 'body'): + print(f"Response body: {e.body}") + + finally: + # Clean up the test file + if os.path.exists(test_file_path): + os.unlink(test_file_path) + print(f"Cleaned up test file: {test_file_path}") + +if __name__ == "__main__": + test_file_upload() \ No newline at end of file