Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions FILE_UPLOAD_FIX.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# File Upload Fix for Glean SDK

## Issue Description

The Glean SDK was failing to upload files with a 400 error, even though the same request worked correctly when sent via cURL. The issue was in the multipart form data serialization logic.

## Root Cause

The problem was in the `serialize_multipart_form` function in `src/glean/api_client/utils/forms.py`. When handling arrays of files, the code was incorrectly appending `"[]"` to the field name:

```python
# Before fix (incorrect)
files.append((f_name + "[]", (file_name, content, content_type)))
```

This caused the SDK to send field names like `"files[]"` instead of the expected `"files"`.

## The Fix

**File:** `src/glean/api_client/utils/forms.py`

**Change:** Removed the `"[]"` suffix when handling file arrays in multipart form serialization.

```python
# After fix (correct)
files.append((f_name, (file_name, content, content_type)))
```

## Why This Fixes the Issue

1. **Server Expectation**: The Glean API server expects file uploads with the field name `"files"` (as shown in the working cURL example: `--form files=@test_input.txt`)

2. **SDK Behavior**: The SDK was sending `"files[]"` instead of `"files"`, causing the server to reject the request with a 400 error.

3. **Array Handling**: The `"[]"` suffix was being added to indicate array fields, but for file uploads, each file in the array should use the base field name `"files"`.

## Testing the Fix

You can test the fix using the provided test script:

```bash
# Set your API token
export GLEAN_API_TOKEN="your-api-token-here"

# Run the test
python test_file_upload.py
```

## Correct Usage Pattern

After the fix, the correct usage pattern remains the same:

```python
from glean.api_client import Glean, errors, models

with Glean(
api_token=GLEAN_TOKEN,
instance="scio-prod",
) as glean:
try:
upload_response = glean.client.chat.upload_files(
files=[
models.File(
file_name="test_input.txt",
content=open(DOCUMENT_PATH, "rb"),
content_type="text/plain",
)
]
)
except GleanError as e:
print(f"Error uploading file: {e}")
print(e.raw_response)
```

## Files Modified

1. `src/glean/api_client/utils/forms.py` - Fixed multipart form serialization
2. `test_file_upload.py` - Test script to verify the fix
3. `file_upload_example.py` - Example showing correct usage
4. `FILE_UPLOAD_FIX.md` - This documentation

## Impact

This fix resolves the 400 error when uploading files through the Glean SDK, making the SDK behavior consistent with direct API calls via cURL.
67 changes: 67 additions & 0 deletions file_upload_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/usr/bin/env python3
"""
Example showing the correct way to upload files using the Glean SDK.

This example demonstrates the fix for the file upload issue where the SDK
was incorrectly appending "[]" to field names in multipart form data.
"""

import os
from pprint import pprint
from glean.api_client import Glean, errors, models

def upload_file_example():
"""
Example of uploading a file to Glean chat.

This shows the correct usage pattern after the multipart form
serialization fix.
"""

# Configuration
GLEAN_TOKEN = os.getenv("GLEAN_API_TOKEN")
DOCUMENT_PATH = "test_input.txt" # Your file path here

if not GLEAN_TOKEN:
print("Error: GLEAN_API_TOKEN environment variable not set")
return

if not os.path.exists(DOCUMENT_PATH):
print(f"Error: File {DOCUMENT_PATH} does not exist")
return

print(f"Uploading file: {DOCUMENT_PATH}")

with Glean(
api_token=GLEAN_TOKEN,
instance="scio-prod",
) as glean:
try:
# Upload the file using the corrected SDK
upload_response = glean.client.chat.upload_files(
files=[
models.File(
file_name=os.path.basename(DOCUMENT_PATH),
content=open(DOCUMENT_PATH, "rb"),
content_type="text/plain",
)
]
)

print("✅ File upload successful!")
print("Upload response:")
pprint(upload_response)

# You can now use the uploaded file in chat
print("\nFile uploaded successfully. You can now use it in chat conversations.")

except errors.GleanError as e:
print("❌ File upload failed!")
print(f"Error: {e}")
print(f"Status code: {e.status_code}")
print(f"Raw response: {e.raw_response}")
if hasattr(e, 'body'):
print(f"Response body: {e.body}")

if __name__ == "__main__":
upload_file_example()
4 changes: 2 additions & 2 deletions src/glean/api_client/utils/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,9 @@ def serialize_multipart_form(
file_name, content, content_type = _extract_file_properties(file_obj)

if content_type is not None:
files.append((f_name + "[]", (file_name, content, content_type)))
files.append((f_name, (file_name, content, content_type)))
else:
files.append((f_name + "[]", (file_name, content)))
files.append((f_name, (file_name, content)))
else:
# Handle single file
file_name, content, content_type = _extract_file_properties(val)
Expand Down
74 changes: 74 additions & 0 deletions test_file_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/env python3
"""
Test script to verify the file upload fix works correctly.
"""

import os
import tempfile
from pprint import pprint

# Import the SDK
from glean.api_client import Glean, errors, models

# Create a test file
def create_test_file():
"""Create a temporary test file for upload."""
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write("This is a test file for Glean upload.\n")
f.write("It contains some sample content.\n")
return f.name

def test_file_upload():
"""Test the file upload functionality."""
# Create a test file
test_file_path = create_test_file()

try:
# Get API token from environment
api_token = os.getenv("GLEAN_API_TOKEN")
if not api_token:
print("Error: GLEAN_API_TOKEN environment variable not set")
return

# Get instance from environment or use default
instance = os.getenv("GLEAN_INSTANCE", "scio-prod")

print(f"Testing file upload to instance: {instance}")
print(f"Test file: {test_file_path}")

with Glean(
api_token=api_token,
instance=instance,
) as glean:
try:
# Upload the file
upload_response = glean.client.chat.upload_files(
files=[
models.File(
file_name=os.path.basename(test_file_path),
content=open(test_file_path, "rb"),
content_type="text/plain",
)
]
)

print("✅ File upload successful!")
print("Response:")
pprint(upload_response)

except errors.GleanError as e:
print("❌ File upload failed!")
print(f"Error: {e}")
print(f"Status code: {e.status_code}")
print(f"Raw response: {e.raw_response}")
if hasattr(e, 'body'):
print(f"Response body: {e.body}")

finally:
# Clean up the test file
if os.path.exists(test_file_path):
os.unlink(test_file_path)
print(f"Cleaned up test file: {test_file_path}")

if __name__ == "__main__":
test_file_upload()