Skip to content

Commit 6de3adf

Browse files
Fix file upload serialization by removing unnecessary array notation
Co-authored-by: chris.freeman <chris.freeman@glean.com>
1 parent d448641 commit 6de3adf

File tree

4 files changed

+227
-2
lines changed

4 files changed

+227
-2
lines changed

FILE_UPLOAD_FIX.md

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# File Upload Fix for Glean SDK
2+
3+
## Issue Description
4+
5+
The Glean SDK was failing to upload files with a 400 error, even though the same request worked correctly when sent via cURL. The issue was in the multipart form data serialization logic.
6+
7+
## Root Cause
8+
9+
The problem was in the `serialize_multipart_form` function in `src/glean/api_client/utils/forms.py`. When handling arrays of files, the code was incorrectly appending `"[]"` to the field name:
10+
11+
```python
12+
# Before fix (incorrect)
13+
files.append((f_name + "[]", (file_name, content, content_type)))
14+
```
15+
16+
This caused the SDK to send field names like `"files[]"` instead of the expected `"files"`.
17+
18+
## The Fix
19+
20+
**File:** `src/glean/api_client/utils/forms.py`
21+
22+
**Change:** Removed the `"[]"` suffix when handling file arrays in multipart form serialization.
23+
24+
```python
25+
# After fix (correct)
26+
files.append((f_name, (file_name, content, content_type)))
27+
```
28+
29+
## Why This Fixes the Issue
30+
31+
1. **Server Expectation**: The Glean API server expects file uploads with the field name `"files"` (as shown in the working cURL example: `--form files=@test_input.txt`)
32+
33+
2. **SDK Behavior**: The SDK was sending `"files[]"` instead of `"files"`, causing the server to reject the request with a 400 error.
34+
35+
3. **Array Handling**: The `"[]"` suffix was being added to indicate array fields, but for file uploads, each file in the array should use the base field name `"files"`.
36+
37+
## Testing the Fix
38+
39+
You can test the fix using the provided test script:
40+
41+
```bash
42+
# Set your API token
43+
export GLEAN_API_TOKEN="your-api-token-here"
44+
45+
# Run the test
46+
python test_file_upload.py
47+
```
48+
49+
## Correct Usage Pattern
50+
51+
After the fix, the correct usage pattern remains the same:
52+
53+
```python
54+
from glean.api_client import Glean, errors, models
55+
56+
with Glean(
57+
api_token=GLEAN_TOKEN,
58+
instance="scio-prod",
59+
) as glean:
60+
try:
61+
upload_response = glean.client.chat.upload_files(
62+
files=[
63+
models.File(
64+
file_name="test_input.txt",
65+
content=open(DOCUMENT_PATH, "rb"),
66+
content_type="text/plain",
67+
)
68+
]
69+
)
70+
except GleanError as e:
71+
print(f"Error uploading file: {e}")
72+
print(e.raw_response)
73+
```
74+
75+
## Files Modified
76+
77+
1. `src/glean/api_client/utils/forms.py` - Fixed multipart form serialization
78+
2. `test_file_upload.py` - Test script to verify the fix
79+
3. `file_upload_example.py` - Example showing correct usage
80+
4. `FILE_UPLOAD_FIX.md` - This documentation
81+
82+
## Impact
83+
84+
This fix resolves the 400 error when uploading files through the Glean SDK, making the SDK behavior consistent with direct API calls via cURL.

file_upload_example.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Example showing the correct way to upload files using the Glean SDK.
4+
5+
This example demonstrates the fix for the file upload issue where the SDK
6+
was incorrectly appending "[]" to field names in multipart form data.
7+
"""
8+
9+
import os
10+
from pprint import pprint
11+
from glean.api_client import Glean, errors, models
12+
13+
def upload_file_example():
14+
"""
15+
Example of uploading a file to Glean chat.
16+
17+
This shows the correct usage pattern after the multipart form
18+
serialization fix.
19+
"""
20+
21+
# Configuration
22+
GLEAN_TOKEN = os.getenv("GLEAN_API_TOKEN")
23+
DOCUMENT_PATH = "test_input.txt" # Your file path here
24+
25+
if not GLEAN_TOKEN:
26+
print("Error: GLEAN_API_TOKEN environment variable not set")
27+
return
28+
29+
if not os.path.exists(DOCUMENT_PATH):
30+
print(f"Error: File {DOCUMENT_PATH} does not exist")
31+
return
32+
33+
print(f"Uploading file: {DOCUMENT_PATH}")
34+
35+
with Glean(
36+
api_token=GLEAN_TOKEN,
37+
instance="scio-prod",
38+
) as glean:
39+
try:
40+
# Upload the file using the corrected SDK
41+
upload_response = glean.client.chat.upload_files(
42+
files=[
43+
models.File(
44+
file_name=os.path.basename(DOCUMENT_PATH),
45+
content=open(DOCUMENT_PATH, "rb"),
46+
content_type="text/plain",
47+
)
48+
]
49+
)
50+
51+
print("✅ File upload successful!")
52+
print("Upload response:")
53+
pprint(upload_response)
54+
55+
# You can now use the uploaded file in chat
56+
print("\nFile uploaded successfully. You can now use it in chat conversations.")
57+
58+
except errors.GleanError as e:
59+
print("❌ File upload failed!")
60+
print(f"Error: {e}")
61+
print(f"Status code: {e.status_code}")
62+
print(f"Raw response: {e.raw_response}")
63+
if hasattr(e, 'body'):
64+
print(f"Response body: {e.body}")
65+
66+
if __name__ == "__main__":
67+
upload_file_example()

src/glean/api_client/utils/forms.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,9 @@ def serialize_multipart_form(
149149
file_name, content, content_type = _extract_file_properties(file_obj)
150150

151151
if content_type is not None:
152-
files.append((f_name + "[]", (file_name, content, content_type)))
152+
files.append((f_name, (file_name, content, content_type)))
153153
else:
154-
files.append((f_name + "[]", (file_name, content)))
154+
files.append((f_name, (file_name, content)))
155155
else:
156156
# Handle single file
157157
file_name, content, content_type = _extract_file_properties(val)

test_file_upload.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Test script to verify the file upload fix works correctly.
4+
"""
5+
6+
import os
7+
import tempfile
8+
from pprint import pprint
9+
10+
# Import the SDK
11+
from glean.api_client import Glean, errors, models
12+
13+
# Create a test file
14+
def create_test_file():
15+
"""Create a temporary test file for upload."""
16+
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
17+
f.write("This is a test file for Glean upload.\n")
18+
f.write("It contains some sample content.\n")
19+
return f.name
20+
21+
def test_file_upload():
22+
"""Test the file upload functionality."""
23+
# Create a test file
24+
test_file_path = create_test_file()
25+
26+
try:
27+
# Get API token from environment
28+
api_token = os.getenv("GLEAN_API_TOKEN")
29+
if not api_token:
30+
print("Error: GLEAN_API_TOKEN environment variable not set")
31+
return
32+
33+
# Get instance from environment or use default
34+
instance = os.getenv("GLEAN_INSTANCE", "scio-prod")
35+
36+
print(f"Testing file upload to instance: {instance}")
37+
print(f"Test file: {test_file_path}")
38+
39+
with Glean(
40+
api_token=api_token,
41+
instance=instance,
42+
) as glean:
43+
try:
44+
# Upload the file
45+
upload_response = glean.client.chat.upload_files(
46+
files=[
47+
models.File(
48+
file_name=os.path.basename(test_file_path),
49+
content=open(test_file_path, "rb"),
50+
content_type="text/plain",
51+
)
52+
]
53+
)
54+
55+
print("✅ File upload successful!")
56+
print("Response:")
57+
pprint(upload_response)
58+
59+
except errors.GleanError as e:
60+
print("❌ File upload failed!")
61+
print(f"Error: {e}")
62+
print(f"Status code: {e.status_code}")
63+
print(f"Raw response: {e.raw_response}")
64+
if hasattr(e, 'body'):
65+
print(f"Response body: {e.body}")
66+
67+
finally:
68+
# Clean up the test file
69+
if os.path.exists(test_file_path):
70+
os.unlink(test_file_path)
71+
print(f"Cleaned up test file: {test_file_path}")
72+
73+
if __name__ == "__main__":
74+
test_file_upload()

0 commit comments

Comments
 (0)