Skip to content

Commit 33265df

Browse files
authored
Merge pull request #2 from itzlambda/refactor/simplify-middleware
refactor(middleware): remove retry and circuit breaker logic
2 parents 9aae3ee + 486295a commit 33265df

File tree

15 files changed

+70
-939
lines changed

15 files changed

+70
-939
lines changed

CLAUDE.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,11 @@ The `openai_compatible` provider is a generic implementation that other provider
2929

3030
### Middleware Stack
3131

32-
The library uses Tower middleware for enterprise features (see `crates/rullm-core/src/middleware.rs`):
33-
- Retry logic with exponential backoff
32+
The library uses Tower middleware (see `crates/rullm-core/src/middleware.rs`):
3433
- Rate limiting
35-
- Circuit breakers
3634
- Timeouts
3735
- Connection pooling
36+
- Logging and metrics
3837

3938
Configuration is done via `MiddlewareConfig` and `LlmServiceBuilder`.
4039

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ rust-version = "1.85"
1010
[workspace.dependencies]
1111
# Core library dependencies
1212
tokio = { version = "1", features = ["full"] }
13-
tower = { version = "0.4", features = ["timeout", "retry", "limit", "util"] }
13+
tower = { version = "0.4", features = ["timeout", "limit", "util"] }
1414
rand = "0.8"
1515
reqwest = { version = "0.11", features = ["json", "stream"] }
1616
bytes = "1.0"

crates/rullm-core/examples/README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ All streaming examples use the `chat_completion_stream` method which returns a `
4040

4141
**Environment:** Requires `OPENAI_API_KEY`
4242

43-
Demonstrates comprehensive OpenAI streaming with:
43+
Demonstrates OpenAI streaming with:
4444
- **Simple streaming chat** with real-time token display
4545
- **Multi-turn conversations** with context preservation
4646
- **Creative writing** with high temperature settings
@@ -87,7 +87,7 @@ while let Some(event) = stream.next().await {
8787

8888
**Environment:** Requires `ANTHROPIC_API_KEY`
8989

90-
Showcases Claude's capabilities with:
90+
Shows Claude streaming with:
9191
- **Philosophical conversations** demonstrating reasoning abilities
9292
- **Creative storytelling** with vivid imagery
9393
- **Code explanation** with technical accuracy
@@ -124,7 +124,7 @@ let mut stream = provider
124124

125125
**Environment:** Requires `GOOGLE_API_KEY`
126126

127-
Highlights Gemini's versatility:
127+
Shows Gemini streaming with:
128128
- **Technical explanations** with precision
129129
- **Creative writing** using experimental models
130130
- **Code analysis** and review capabilities
@@ -265,7 +265,7 @@ Demonstrates:
265265

266266
Key features:
267267
- **Environment-based configuration**
268-
- **Custom endpoints** for enterprise setups
268+
- **Custom endpoints** for custom API URLs
269269
- **Validation and error handling**
270270
- **Health checks** and model availability
271271
- **Request builder patterns** from minimal to full-featured
@@ -434,7 +434,7 @@ cargo run --example test_all_providers
434434
🎉 All providers are working correctly!
435435
```
436436

437-
This example is perfect for:
437+
Use this example for:
438438
- Verifying your API keys work
439439
- Testing network connectivity
440440
- Validating provider implementations

crates/rullm-core/examples/anthropic_stream.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
183183
println!("\n\n🎯 Tips for using Anthropic Claude streaming:");
184184
println!("• Set ANTHROPIC_API_KEY environment variable");
185185
println!("• Use .stream(true) in ChatRequestBuilder");
186-
println!("• Claude models: haiku (fast), sonnet (balanced), opus (powerful)");
187-
println!("• Claude excels at reasoning, analysis, and creative writing");
186+
println!("• Claude models: haiku (fast), sonnet (balanced), opus (largest)");
187+
println!("• Claude supports reasoning, analysis, and creative writing");
188188
println!("• Lower temperature (0.1-0.4) for factual content");
189189
println!("• Higher temperature (0.7-1.0) for creative content");
190190

crates/rullm-core/examples/basic_usage.rs

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use rullm_core::{ChatRequestBuilder, LlmError};
1+
use rullm_core::ChatRequestBuilder;
22

33
// This example demonstrates the unified interface without actual provider implementations
44
// It shows how the library would be used once provider modules are implemented
@@ -33,15 +33,5 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
3333
println!("\nThis example shows the unified interface design.");
3434
println!("Actual provider implementations will be added in subsequent tasks.");
3535

36-
// Example of error handling
37-
let error_example = LlmError::rate_limit(
38-
"Too many requests",
39-
Some(std::time::Duration::from_secs(60)),
40-
);
41-
println!("\nError handling example:");
42-
println!(" Error: {error_example}");
43-
println!(" Is retryable: {}", error_example.is_retryable());
44-
println!(" Retry delay: {:?}", error_example.retry_delay());
45-
4636
Ok(())
4737
}

crates/rullm-core/examples/gemini_stream.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
219219
println!(
220220
"• Models: gemini-1.5-flash (fast), gemini-1.5-pro (balanced), gemini-2.0-flash-exp (experimental)"
221221
);
222-
println!("• Gemini excels at reasoning, code analysis, and creative tasks");
222+
println!("• Gemini supports reasoning, code analysis, and creative tasks");
223223
println!("• Lower temperature (0.1-0.4) for factual/technical content");
224224
println!("• Higher temperature (0.7-1.0) for creative content");
225225
println!("• Use top_p for more controlled randomness");

crates/rullm-core/examples/middleware_usage.rs

Lines changed: 17 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use rullm_core::{
22
ChatRequestBuilder, ConfigBuilder, LlmServiceBuilder, MiddlewareConfig, OpenAIProvider,
3-
RateLimit, config::RetryPolicy,
3+
RateLimit,
44
};
55
use std::time::Duration;
66

@@ -14,16 +14,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
1414
// Example 1: Basic middleware stack with defaults
1515
basic_middleware_example().await?;
1616

17-
// Example 2: Custom retry policy with exponential backoff
18-
custom_retry_example().await?;
19-
20-
// Example 3: Production-ready configuration
17+
// Example 2: Configuration with timeouts and rate limiting
2118
production_config_example().await?;
2219

23-
// Example 4: Rate-limited and monitored configuration
20+
// Example 3: Rate-limited and monitored configuration
2421
rate_limited_example().await?;
2522

26-
// Example 5: Custom middleware configuration
23+
// Example 4: Custom middleware configuration
2724
custom_middleware_config_example().await?;
2825

2926
Ok(())
@@ -57,91 +54,47 @@ async fn basic_middleware_example() -> Result<(), Box<dyn std::error::Error>> {
5754
Ok(())
5855
}
5956

60-
/// Example 2: Custom retry policy with exponential backoff
61-
async fn custom_retry_example() -> Result<(), Box<dyn std::error::Error>> {
62-
println!("🔄 Example 2: Custom Retry Policy");
63-
64-
let config = ConfigBuilder::openai_from_env()?;
65-
let provider = OpenAIProvider::new(config)?;
66-
67-
// Create middleware with custom exponential backoff retry policy
68-
let mut middleware_stack = LlmServiceBuilder::new()
69-
.timeout(Duration::from_secs(60)) // 60 second timeout
70-
.retry(RetryPolicy::ExponentialBackoff {
71-
initial_delay_ms: 200, // Start with 200ms
72-
max_delay_ms: 10000, // Cap at 10 seconds
73-
multiplier: 2.5, // Aggressive backoff
74-
jitter: true, // Add randomness
75-
})
76-
.logging()
77-
.build(provider, "gpt-3.5-turbo".to_string());
78-
79-
let request = ChatRequestBuilder::new()
80-
.user("Explain quantum computing in simple terms")
81-
.temperature(0.7)
82-
.max_tokens(150)
83-
.build();
84-
85-
let response = middleware_stack.call(request).await?;
86-
87-
println!("✅ Response: {}", response.message.content);
88-
println!("🔄 Retry policy: Exponential backoff with jitter\n");
89-
90-
Ok(())
91-
}
92-
93-
/// Example 3: Production-ready configuration
57+
/// Example 2: Configuration with timeouts and rate limiting
9458
async fn production_config_example() -> Result<(), Box<dyn std::error::Error>> {
95-
println!("🏭 Example 3: Production Configuration");
59+
println!("🏭 Example 2: Configuration with Timeouts and Rate Limiting");
9660

9761
let config = ConfigBuilder::openai_from_env()?;
9862
let provider = OpenAIProvider::new(config)?;
9963

100-
// Production-ready middleware configuration
64+
// Middleware configuration with timeouts and rate limiting
10165
let mut middleware_stack = LlmServiceBuilder::new()
10266
.timeout(Duration::from_secs(30)) // Conservative timeout
103-
.retry(RetryPolicy::ApiGuided {
104-
fallback: Box::new(RetryPolicy::ExponentialBackoff {
105-
initial_delay_ms: 100,
106-
max_delay_ms: 5000,
107-
multiplier: 2.0,
108-
jitter: true,
109-
}),
110-
max_api_delay_ms: 30000, // Don't wait more than 30 seconds
111-
retry_headers: vec!["retry-after".to_string(), "x-ratelimit-reset".to_string()],
112-
})
11367
.rate_limit(100, Duration::from_secs(60)) // 100 requests per minute
114-
.logging() // Always log in production
115-
.metrics() // Always collect metrics
68+
.logging()
69+
.metrics()
11670
.build(provider, "gpt-4".to_string());
11771

11872
let request = ChatRequestBuilder::new()
11973
.system("You are a helpful assistant for a production application.")
12074
.user("How can I optimize my database queries?")
121-
.temperature(0.3) // More deterministic for production
75+
.temperature(0.3) // Lower temperature for more deterministic output
12276
.max_tokens(300)
12377
.build();
12478

12579
let response = middleware_stack.call(request).await?;
12680

127-
println!("✅ Production response received");
81+
println!("✅ Response received");
12882
println!("📊 Token usage: {}", response.usage.total_tokens);
129-
println!("🛡️ Configuration: API-guided retry, rate limited, fully monitored\n");
83+
println!("🛡️ Configuration: Rate limited, logged and monitored\n");
13084

13185
Ok(())
13286
}
13387

134-
/// Example 4: Rate-limited and monitored configuration
88+
/// Example 3: Rate-limited and monitored configuration
13589
async fn rate_limited_example() -> Result<(), Box<dyn std::error::Error>> {
136-
println!("⏱️ Example 4: Rate Limited Configuration");
90+
println!("⏱️ Example 3: Rate Limited Configuration");
13791

13892
let config = ConfigBuilder::openai_from_env()?;
13993
let provider = OpenAIProvider::new(config)?;
14094

14195
// Configuration optimized for rate limiting and monitoring
14296
let mut middleware_stack = LlmServiceBuilder::new()
14397
.timeout(Duration::from_secs(45))
144-
.retry(RetryPolicy::Fixed { delay_ms: 1000 }) // Simple fixed delay
14598
.rate_limit(50, Duration::from_secs(60)) // Conservative rate limit
14699
.logging()
147100
.metrics()
@@ -184,22 +137,16 @@ async fn rate_limited_example() -> Result<(), Box<dyn std::error::Error>> {
184137
Ok(())
185138
}
186139

187-
/// Example 5: Custom middleware configuration from struct
140+
/// Example 4: Custom middleware configuration from struct
188141
async fn custom_middleware_config_example() -> Result<(), Box<dyn std::error::Error>> {
189-
println!("⚙️ Example 5: Custom Middleware Configuration");
142+
println!("⚙️ Example 4: Custom Middleware Configuration");
190143

191144
let config = ConfigBuilder::openai_from_env()?;
192145
let provider = OpenAIProvider::new(config)?;
193146

194147
// Define custom middleware configuration
195148
let middleware_config = MiddlewareConfig {
196149
timeout: Some(Duration::from_secs(20)),
197-
retry_policy: Some(RetryPolicy::ExponentialBackoff {
198-
initial_delay_ms: 500,
199-
max_delay_ms: 8000,
200-
multiplier: 1.8,
201-
jitter: false,
202-
}),
203150
rate_limit: Some(RateLimit {
204151
requests_per_period: 25,
205152
period: Duration::from_secs(60),
@@ -225,9 +172,7 @@ async fn custom_middleware_config_example() -> Result<(), Box<dyn std::error::Er
225172
"📊 Response length: {} characters",
226173
response.message.content.len()
227174
);
228-
println!(
229-
"⚙️ Configuration: Custom timeouts, exponential backoff (no jitter), 25 req/min limit\n"
230-
);
175+
println!("⚙️ Configuration: Custom timeouts, 25 req/min limit\n");
231176

232177
// Display the configuration details
233178
let config = middleware_stack.config();

crates/rullm-core/examples/retry_policy_example.rs

Lines changed: 0 additions & 50 deletions
This file was deleted.

0 commit comments

Comments
 (0)