3232)
3333
3434
35+ def wait_for_cassandra_ready (host = "127.0.0.1" , timeout = 30 ):
36+ """Wait for Cassandra to be ready by executing a test query with cqlsh."""
37+ start_time = time .time ()
38+ while time .time () - start_time < timeout :
39+ try :
40+ # Use cqlsh to test if Cassandra is ready
41+ result = subprocess .run (
42+ ["cqlsh" , host , "-e" , "SELECT release_version FROM system.local;" ],
43+ capture_output = True ,
44+ text = True ,
45+ timeout = 5 ,
46+ )
47+ if result .returncode == 0 :
48+ return True
49+ except (subprocess .TimeoutExpired , Exception ):
50+ pass
51+ time .sleep (0.5 )
52+ return False
53+
54+
55+ def wait_for_cassandra_down (host = "127.0.0.1" , timeout = 10 ):
56+ """Wait for Cassandra to be down by checking if cqlsh fails."""
57+ start_time = time .time ()
58+ while time .time () - start_time < timeout :
59+ try :
60+ result = subprocess .run (
61+ ["cqlsh" , host , "-e" , "SELECT 1;" ], capture_output = True , text = True , timeout = 2
62+ )
63+ if result .returncode != 0 :
64+ return True
65+ except (subprocess .TimeoutExpired , Exception ):
66+ return True
67+ time .sleep (0.5 )
68+ return False
69+
70+
3571@pytest_asyncio .fixture (autouse = True )
3672async def ensure_cassandra_enabled_bdd (cassandra_container ):
3773 """Ensure Cassandra binary protocol is enabled before and after each test."""
@@ -74,7 +110,7 @@ async def unique_test_keyspace(cassandra_container):
74110 if not health ["native_transport" ] or not health ["cql_available" ]:
75111 pytest .fail (f"Cassandra not healthy: { health } " )
76112
77- cluster = AsyncCluster (contact_points = ["localhost " ], protocol_version = 5 )
113+ cluster = AsyncCluster (contact_points = ["127.0.0.1 " ], protocol_version = 5 )
78114 session = await cluster .connect ()
79115
80116 # Create unique keyspace
@@ -172,8 +208,9 @@ async def test_scenario():
172208 ), f"Failed to disable binary protocol: { disable_result .stderr } "
173209 print ("✓ Binary protocol disabled - simulating Cassandra outage" )
174210
175- # Give connections time to fail
176- await asyncio .sleep (3 )
211+ # Wait for Cassandra to be truly down using cqlsh
212+ assert wait_for_cassandra_down (), "Cassandra did not go down"
213+ print ("✓ Confirmed Cassandra is down via cqlsh" )
177214
178215 # Then: APIs should return 503 Service Unavailable errors
179216 print ("\n Then: APIs should return 503 Service Unavailable errors" )
@@ -203,27 +240,47 @@ async def test_scenario():
203240 assert (
204241 enable_result .returncode == 0
205242 ), f"Failed to enable binary protocol: { enable_result .stderr } "
206- print ("✓ Binary protocol re-enabled - Cassandra is now available" )
243+ print ("✓ Binary protocol re-enabled" )
244+
245+ # Wait for Cassandra to be truly ready using cqlsh
246+ assert wait_for_cassandra_ready (), "Cassandra did not come back up"
247+ print ("✓ Confirmed Cassandra is ready via cqlsh" )
207248
208249 # Then: The application should automatically reconnect
209250 print ("\n Then: The application should automatically reconnect" )
210- print ("Waiting for automatic reconnection (up to 30 seconds)..." )
211251
212- # Wait for recovery
213- start_time = time .time ()
252+ # Now check if the app has reconnected
253+ # The FastAPI app uses a 2-second constant reconnection delay, so we need to wait
254+ # at least that long plus some buffer for the reconnection to complete
214255 reconnected = False
215- while time .time () - start_time < 30 :
256+ # Wait up to 30 seconds - driver needs time to rediscover the host
257+ for attempt in range (30 ): # Up to 30 seconds (30 * 1s)
216258 try :
217- # Try a simple query
218- response = await app_client .get ("/users?limit=1" )
219- if response .status_code == 200 :
220- reconnected = True
221- break
222- except (httpx .TimeoutException , httpx .RequestError ):
223- pass
224- await asyncio .sleep (2 )
225-
226- assert reconnected , "Failed to reconnect within 30 seconds"
259+ # Check health first to see connection status
260+ health_resp = await app_client .get ("/health" )
261+ if health_resp .status_code == 200 :
262+ health_data = health_resp .json ()
263+ if health_data .get ("cassandra_connected" ):
264+ # Now try actual query
265+ response = await app_client .get ("/users?limit=1" )
266+ if response .status_code == 200 :
267+ reconnected = True
268+ print (f"✓ App reconnected after { attempt + 1 } seconds" )
269+ break
270+ else :
271+ print (
272+ f" Health says connected but query returned { response .status_code } "
273+ )
274+ else :
275+ if attempt % 5 == 0 : # Print every 5 seconds
276+ print (
277+ f" After { attempt } seconds: Health check says not connected yet"
278+ )
279+ except (httpx .TimeoutException , httpx .RequestError ) as e :
280+ print (f" Attempt { attempt + 1 } : Connection error: { type (e ).__name__ } " )
281+ await asyncio .sleep (1.0 ) # Check every second
282+
283+ assert reconnected , "Application failed to reconnect after Cassandra came back"
227284 print ("✓ Application successfully reconnected to Cassandra" )
228285
229286 # Verify health check shows connected again
@@ -273,7 +330,7 @@ async def test_scenario():
273330 assert health_response .status_code == 200
274331 assert health_response .json ()["cassandra_connected" ] is True
275332
276- cycles = 3
333+ cycles = 1 # Just test one cycle to speed up
277334 for cycle in range (1 , cycles + 1 ):
278335 print (f"\n When: Cassandra outage cycle { cycle } /{ cycles } begins" )
279336
@@ -284,7 +341,11 @@ async def test_scenario():
284341 assert disable_result .returncode == 0
285342 print (f"✓ Cycle { cycle } : Binary protocol disabled" )
286343
287- await asyncio .sleep (2 )
344+ # Wait for Cassandra to be down
345+ assert wait_for_cassandra_down (
346+ timeout = 5
347+ ), f"Cycle { cycle } : Cassandra did not go down"
348+ print (f"✓ Cycle { cycle } : Confirmed Cassandra is down via cqlsh" )
288349
289350 # Verify unhealthy state
290351 health_response = await app_client .get ("/health" )
@@ -296,18 +357,24 @@ async def test_scenario():
296357 assert enable_result .returncode == 0
297358 print (f"✓ Cycle { cycle } : Binary protocol re-enabled" )
298359
299- # Wait for recovery
300- start_time = time .time ()
360+ # Wait for Cassandra to be ready
361+ assert wait_for_cassandra_ready (
362+ timeout = 10
363+ ), f"Cycle { cycle } : Cassandra did not come back"
364+ print (f"✓ Cycle { cycle } : Confirmed Cassandra is ready via cqlsh" )
365+
366+ # Check app reconnection
367+ # The FastAPI app uses a 2-second constant reconnection delay
301368 reconnected = False
302- while time . time () - start_time < 20 :
369+ for _ in range ( 8 ): # Up to 4 seconds to account for 2s reconnection delay
303370 try :
304371 response = await app_client .get ("/users?limit=1" )
305372 if response .status_code == 200 :
306373 reconnected = True
307374 break
308375 except Exception :
309376 pass
310- await asyncio .sleep (2 )
377+ await asyncio .sleep (0.5 )
311378
312379 assert reconnected , f"Cycle { cycle } : Failed to reconnect"
313380 print (f"✓ Cycle { cycle } : Successfully reconnected" )
0 commit comments