55import asyncio
66from typing import TYPE_CHECKING
77
8+ import logistro
9+
810if TYPE_CHECKING :
911 from choreographer import Browser , Tab
12+ from choreographer .protocol .devtools_async import Session
1013
1114 from . import BrowserResponse
1215
16+ _logger = logistro .getLogger (__name__ )
17+
18+ # Abit about the mechanics of chrome:
19+ # Whether or not a Page.loadEventFired event fires is a bit
20+ # racey. Optimistically, it's buffered and fired after subscription
21+ # even if the event happened in the past.
22+ # Doesn't seem to always work out that way, so we also use
23+ # javascript to create a "loaded" event, but for the case
24+ # where we need to timeout- loading a page that never resolves,
25+ # the browser might actually load an about:blank instead and then
26+ # fire the event, misleading the user, so we check the url.
27+
28+
29+ async def _check_document_ready (session : Session , url : str ) -> BrowserResponse :
30+ return await session .send_command (
31+ "Runtime.evaluate" ,
32+ params = {
33+ "expression" : """
34+ new Promise((resolve) => {
35+ if (
36+ (document.readyState === 'complete') &&
37+ (window.location==`""" # CONCATENATE!
38+ f"{ url !s} "
39+ """`)
40+ ){
41+ resolve("Was complete");
42+ } else {
43+ window.addEventListener(
44+ 'load', () => resolve("Event loaded")
45+ );
46+ }
47+ })
48+ """ ,
49+ "awaitPromise" : True ,
50+ "returnByValue" : True ,
51+ },
52+ )
53+
1354
1455async def create_and_wait (
1556 browser : Browser ,
@@ -29,24 +70,61 @@ async def create_and_wait(
2970 The created Tab
3071
3172 """
73+ _logger .debug ("Creating tab" )
3274 tab = await browser .create_tab (url )
75+ _logger .debug ("Creating session" )
3376 temp_session = await tab .create_session ()
3477
3578 try :
79+ _logger .debug ("Subscribing to loadEven and enabling events." )
3680 load_future = temp_session .subscribe_once ("Page.loadEventFired" )
3781 await temp_session .send_command ("Page.enable" )
3882 await temp_session .send_command ("Runtime.enable" )
3983
4084 if url :
4185 try :
42- await asyncio .wait_for (load_future , timeout = timeout )
43- except (asyncio .TimeoutError , asyncio .CancelledError , TimeoutError ):
86+ # JavaScript evaluation to check if document is loaded
87+ js_ready_future = asyncio .create_task (
88+ _check_document_ready (temp_session , url ),
89+ )
90+ _logger .debug (f"Starting wait: timeout={ timeout } " )
91+ # Race between the two methods: first one to complete wins
92+ done , pending = await asyncio .wait (
93+ [
94+ load_future ,
95+ js_ready_future ,
96+ ],
97+ return_when = asyncio .FIRST_COMPLETED ,
98+ timeout = timeout ,
99+ )
100+ _logger .debug (f"Finish wait, is done? { bool (done )} " )
101+
102+ for task in pending :
103+ _logger .debug (f"Cancelling: { task } " )
104+ task .cancel ()
105+
106+ if not done :
107+ _logger .debug ("Timeout waiting for js or event" )
108+ raise asyncio .TimeoutError ( # noqa: TRY301
109+ "Page load timeout" ,
110+ )
111+ else :
112+ _logger .debug (f"Task which finished: { done } " )
113+
114+ except (
115+ asyncio .TimeoutError ,
116+ asyncio .CancelledError ,
117+ TimeoutError ,
118+ ) as e :
44119 # Stop the page load when timeout occurs
120+ _logger .debug ("Need to stop page loading, error." , exc_info = e )
45121 await temp_session .send_command ("Page.stopLoading" )
46122 raise
47123 finally :
124+ _logger .debug ("Closing session" )
48125 await tab .close_session (temp_session .session_id )
49126
127+ _logger .debug ("Returning tab." )
50128 return tab
51129
52130
@@ -71,9 +149,10 @@ async def navigate_and_wait(
71149 temp_session = await tab .create_session ()
72150
73151 try :
152+ # Subscribe BEFORE enabling domains to avoid race condition
153+ load_future = temp_session .subscribe_once ("Page.loadEventFired" )
74154 await temp_session .send_command ("Page.enable" )
75155 await temp_session .send_command ("Runtime.enable" )
76- load_future = temp_session .subscribe_once ("Page.loadEventFired" )
77156 try :
78157
79158 async def _freezers () -> None :
0 commit comments