diff --git a/examples/cdp_mode/ReadMe.md b/examples/cdp_mode/ReadMe.md index a60cae1896f..bde2c106be2 100644 --- a/examples/cdp_mode/ReadMe.md +++ b/examples/cdp_mode/ReadMe.md @@ -391,7 +391,7 @@ sb.cdp.select_all(selector, timeout=None) sb.cdp.find_elements(selector, timeout=None) sb.cdp.find_visible_elements(selector, timeout=None) sb.cdp.click(selector, timeout=None) -sb.cdp.click_if_visible(selector) +sb.cdp.click_if_visible(selector, timeout=0) sb.cdp.click_visible_elements(selector, limit=0) sb.cdp.click_nth_element(selector, number) sb.cdp.click_nth_visible_element(selector, number) diff --git a/examples/cdp_mode/raw_cdp_reddit.py b/examples/cdp_mode/raw_cdp_reddit.py new file mode 100644 index 00000000000..3cca3bb2502 --- /dev/null +++ b/examples/cdp_mode/raw_cdp_reddit.py @@ -0,0 +1,16 @@ +"""Reddit Search / Bypasses reCAPTCHA.""" +from seleniumbase import sb_cdp + +search = "reddit+scraper" +url = f"https://www.reddit.com/r/webscraping/search/?q={search}" +sb = sb_cdp.Chrome(url, use_chromium=True) +sb.solve_captcha() # Might not be needed +post_title = '[data-testid="post-title"]' +sb.wait_for_element(post_title) +for i in range(8): + sb.scroll_down(25) + sb.sleep(0.2) +posts = sb.select_all(post_title) +print('*** Reddit Posts for "%s":' % search) +for post in posts: + print("* " + post.text) diff --git a/examples/cdp_mode/raw_cf.py b/examples/cdp_mode/raw_cf.py index 79dc5339a23..05a921608d5 100644 --- a/examples/cdp_mode/raw_cf.py +++ b/examples/cdp_mode/raw_cf.py @@ -1,14 +1,14 @@ """Using CDP Mode with PyAutoGUI to bypass CAPTCHAs.""" from seleniumbase import SB -with SB(uc=True, test=True, locale="en", guest=True) as sb: +with SB(uc=True, test=True, guest=True) as sb: url = "https://www.cloudflare.com/login" sb.activate_cdp_mode(url) sb.sleep(3) sb.uc_gui_handle_captcha() # PyAutoGUI press Tab and Spacebar sb.sleep(3) -with SB(uc=True, test=True, locale="en", guest=True) as sb: +with SB(uc=True, test=True, guest=True) as sb: url = "https://www.cloudflare.com/login" sb.activate_cdp_mode(url) sb.sleep(4) diff --git a/examples/cdp_mode/raw_cf_captcha.py b/examples/cdp_mode/raw_cf_captcha.py index 7c91735dd03..ac429b9e16f 100644 --- a/examples/cdp_mode/raw_cf_captcha.py +++ b/examples/cdp_mode/raw_cf_captcha.py @@ -3,6 +3,7 @@ with SB(uc=True, test=True, guest=True) as sb: url = "https://www.cloudflare.com/login" sb.activate_cdp_mode(url) - sb.sleep(3) + sb.wait_for_element('div[data-testid*="challenge-widget"]') + sb.sleep(1.5) sb.solve_captcha() sb.sleep(3) diff --git a/examples/cdp_mode/raw_gas_records.py b/examples/cdp_mode/raw_gas_records.py new file mode 100644 index 00000000000..73e2ee66d2a --- /dev/null +++ b/examples/cdp_mode/raw_gas_records.py @@ -0,0 +1,28 @@ +"""(Bypasses the Imperva/Incapsula hCaptcha)""" +from seleniumbase import SB + +with SB(uc=True, test=True) as sb: + url = ( + "https://www.gassaferegister.co.uk/gas-safety" + "/gas-safety-certificates-records/building-regulations-certificate" + "/order-replacement-building-regulations-certificate/" + ) + sb.activate_cdp_mode(url) + sb.sleep(0.6) + sb.solve_captcha() + sb.sleep(1) + sb.wait_for_element("#SearchTerm", timeout=5) + sb.sleep(2) + allow_cookies = 'button:contains("Allow all cookies")' + sb.click_if_visible(allow_cookies, timeout=2) + sb.sleep(1.2) + sb.press_keys("#SearchTerm", "Hydrogen") + sb.sleep(0.5) + sb.click("button.search-button") + sb.sleep(3) + results = sb.find_elements("div.search-result") + for result in results: + print(result.text.replace(" " * 12, " ").strip()) + print() + sb.scroll_to_bottom() + sb.sleep(1) diff --git a/examples/raw_cf.py b/examples/raw_cf.py index cc893e7522f..c4ecc7993d7 100644 --- a/examples/raw_cf.py +++ b/examples/raw_cf.py @@ -1,9 +1,10 @@ """SB Manager using CDP Mode for bypassing CAPTCHAs.""" from seleniumbase import SB -with SB(uc=True, test=True, locale="en", guest=True) as sb: +with SB(uc=True, test=True, guest=True) as sb: url = "https://www.cloudflare.com/login" sb.activate_cdp_mode(url) - sb.sleep(4) + sb.wait_for_element('div[data-testid*="challenge-widget"]') + sb.sleep(1.5) sb.solve_captcha() - sb.sleep(2.5) + sb.sleep(3) diff --git a/mkdocs_build/requirements.txt b/mkdocs_build/requirements.txt index 2d2bd72091e..3c3537221e4 100644 --- a/mkdocs_build/requirements.txt +++ b/mkdocs_build/requirements.txt @@ -10,7 +10,7 @@ click==8.3.1 ghp-import==2.1.0 watchdog==6.0.0 cairocffi==1.7.1 -pathspec==1.0.1 +pathspec==1.0.3 Babel==2.17.0 paginate==0.5.7 mkdocs==1.6.1 diff --git a/requirements.txt b/requirements.txt index 2370414f210..bdeca01df67 100755 --- a/requirements.txt +++ b/requirements.txt @@ -6,9 +6,10 @@ wheel>=0.45.1 attrs>=25.4.0 certifi>=2026.1.4 exceptiongroup>=1.3.1 -websockets>=15.0.1 +websockets~=15.0.1;python_version<"3.10" +websockets>=16.0;python_version>="3.10" filelock~=3.19.1;python_version<"3.10" -filelock>=3.20.2;python_version>="3.10" +filelock>=3.20.3;python_version>="3.10" fasteners>=0.20 mycdp>=1.3.2 pynose>=1.5.5 @@ -26,7 +27,7 @@ pyyaml>=6.0.3 pygments>=2.19.2 pyreadline3>=3.5.4;platform_system=="Windows" tabcompleter>=1.4.0 -pdbp>=1.8.1 +pdbp>=1.8.2 idna>=3.11 chardet==5.2.0 charset-normalizer>=3.4.4,<4 diff --git a/seleniumbase/__version__.py b/seleniumbase/__version__.py index 7d9b8d301dd..ac062a21652 100755 --- a/seleniumbase/__version__.py +++ b/seleniumbase/__version__.py @@ -1,2 +1,2 @@ # seleniumbase package -__version__ = "4.45.10" +__version__ = "4.45.11" diff --git a/seleniumbase/console_scripts/sb_install.py b/seleniumbase/console_scripts/sb_install.py index ea00ce4ece8..6c2977ee369 100644 --- a/seleniumbase/console_scripts/sb_install.py +++ b/seleniumbase/console_scripts/sb_install.py @@ -554,7 +554,15 @@ def main(override=None, intel_for_uc=None, force_uc=None): else: url_request = get_cft_latest_versions_per_milestone() if not force_cft and url_request.ok: - fver = get_cft_latest_version_from_milestone(use_version) + try: + fver = get_cft_latest_version_from_milestone( + use_version + ) + except KeyError: + use_version = str(int(use_version) - 1) + fver = get_cft_latest_version_from_milestone( + use_version + ) found_chromedriver = True use_version = str(fver) if use_version == latest_version: diff --git a/seleniumbase/core/browser_launcher.py b/seleniumbase/core/browser_launcher.py index 8af9f901aff..7ffa8f0169e 100644 --- a/seleniumbase/core/browser_launcher.py +++ b/seleniumbase/core/browser_launcher.py @@ -1338,12 +1338,12 @@ def _uc_gui_click_captcha( _on_a_captcha_page = None if ctype == "cf_t": if not _on_a_cf_turnstile_page(driver): - return + return False else: _on_a_captcha_page = _on_a_cf_turnstile_page elif ctype == "g_rc": if not _on_a_g_recaptcha_page(driver): - return + return False else: _on_a_captcha_page = _on_a_g_recaptcha_page else: @@ -1354,7 +1354,7 @@ def _uc_gui_click_captcha( ctype = "cf_t" _on_a_captcha_page = _on_a_cf_turnstile_page else: - return + return False install_pyautogui_if_missing(driver) import pyautogui pyautogui = get_configured_pyautogui(pyautogui) @@ -1505,7 +1505,7 @@ def _uc_gui_click_captcha( ): frame = "div:not([class]) > div:not([class])" else: - return + return False if ( driver.is_element_present("form") and ( @@ -1605,9 +1605,9 @@ def _uc_gui_click_captcha( if driver.is_connected(): driver.switch_to_frame("iframe") else: - return + return False if not i_x or not i_y: - return + return False try: if ctype == "g_rc" and not driver.is_connected(): x = (i_x + 29) * width_ratio @@ -1638,7 +1638,7 @@ def _uc_gui_click_captcha( try: driver.switch_to.default_content() except Exception: - return + return False if x and y: sb_config._saved_cf_x_y = (x, y) if not __is_cdp_swap_needed(driver): @@ -1652,7 +1652,7 @@ def _uc_gui_click_captcha( _uc_gui_click_x_y(driver, x, y, timeframe=0.32) if __is_cdp_swap_needed(driver): time.sleep(float(constants.UC.RECONNECT_TIME) / 2.0) - return + return True reconnect_time = (float(constants.UC.RECONNECT_TIME) / 2.0) + 0.6 if IS_LINUX: reconnect_time = constants.UC.RECONNECT_TIME + 0.2 @@ -1684,17 +1684,17 @@ def _uc_gui_click_captcha( try: driver.switch_to_frame("iframe") except Exception: - return + return False checkbox_success = None if ctype == "cf_t": checkbox_success = "#success-icon" elif ctype == "g_rc": checkbox_success = "span.recaptcha-checkbox-checked" else: - return # If this line is reached, ctype wasn't set + return False # If line is reached, ctype wasn't set if driver.is_element_visible("#success-icon"): driver.switch_to.parent_frame(checkbox_success) - return + return True if blind: driver.uc_open_with_disconnect(driver.get_current_url(), 3.8) if __is_cdp_swap_needed(driver) and _on_a_captcha_page(driver): @@ -1708,6 +1708,7 @@ def _uc_gui_click_captcha( _uc_gui_click_x_y(driver, x, y, timeframe=0.32) if not cdp_mode_on_at_start: driver.reconnect(reconnect_time) + return True def uc_gui_click_captcha(driver, frame="iframe", retry=False, blind=False): @@ -5244,6 +5245,15 @@ def get_local_driver( or driver_version == "keep" ): browser_driver_close_match = True + one_off_chromium = False + if ( + hasattr(sb_config, "binary_location") + and sb_config.binary_location == "_chromium_" + ): + with suppress(Exception): + one_off_chromium_ver = int(use_version.split(".")[0]) - 1 + if one_off_chromium_ver == int(ch_driver_version): + one_off_chromium = True # If not ARM MAC and need to use uc_driver (and it's missing), # and already have chromedriver with the correct version, # then copy chromedriver to uc_driver (and it'll get patched). @@ -5275,12 +5285,16 @@ def get_local_driver( and use_version != "latest" # Browser version detected and (ch_driver_version or not local_ch_exists) and ( - use_version.split(".")[0] != ch_driver_version + ( + use_version.split(".")[0] != ch_driver_version + and not one_off_chromium + ) or ( not local_ch_exists and use_version.isnumeric() and int(use_version) >= 115 and not browser_driver_close_match + and not one_off_chromium ) ) ) @@ -5288,12 +5302,14 @@ def get_local_driver( use_uc and use_version != "latest" # Browser version detected and uc_driver_version != use_version + and not one_off_chromium ) or ( full_ch_driver_version # Also used for the uc_driver and driver_version and len(str(driver_version).split(".")) == 4 and full_ch_driver_version != driver_version + and not one_off_chromium ) ): # chromedriver download needed in the seleniumbase/drivers dir diff --git a/seleniumbase/core/sb_cdp.py b/seleniumbase/core/sb_cdp.py index 74b86679a85..0c93bf4aa89 100644 --- a/seleniumbase/core/sb_cdp.py +++ b/seleniumbase/core/sb_cdp.py @@ -791,11 +791,16 @@ def click_active_element(self): self.__slow_mode_pause_if_set() self.loop.run_until_complete(self.page.wait()) - def click_if_visible(self, selector): + def click_if_visible(self, selector, timeout=0): if self.is_element_visible(selector): with suppress(Exception): - element = self.find_element(selector, timeout=0) + self.click(selector, timeout=1) + else: + with suppress(Exception): + element = self.find_element(selector, timeout=timeout) + self.sleep(0.1) element.scroll_into_view() + self.sleep(0.1) element.click() self.__slow_mode_pause_if_set() self.loop.run_until_complete(self.page.wait()) @@ -1969,6 +1974,12 @@ def _on_a_cf_turnstile_page(self, source=None): return True return False + def _on_an_incapsula_hcaptcha_page(self, *args, **kwargs): + self.loop.run_until_complete(self.page.wait()) + if self.is_element_visible('iframe[src*="_Incapsula_Resource?"]'): + return True + return False + def _on_a_g_recaptcha_page(self, *args, **kwargs): time.sleep(0.4) # reCAPTCHA may need a moment to appear self.loop.run_until_complete(self.page.wait()) @@ -1999,20 +2010,34 @@ def __gui_click_recaptcha(self, use_cdp=False): if self.is_element_visible('iframe[title="reCAPTCHA"]'): selector = 'iframe[title="reCAPTCHA"]' else: - return + return False time.sleep(0.25) self.loop.run_until_complete(self.page.wait()) time.sleep(0.25) with suppress(Exception): - element_rect = self.get_gui_element_rect(selector, timeout=1) + element_rect = self.get_element_rect(selector, timeout=0.1) e_x = element_rect["x"] e_y = element_rect["y"] + window_rect = self.get_window_rect() + win_width = window_rect["innerWidth"] + win_height = window_rect["innerHeight"] + if ( + e_x > 1040 + and e_y > 640 + and abs(win_width - e_x) < 110 + and abs(win_height - e_y) < 110 + ): + # Probably the invisible reCAPTCHA in the bottom right corner + return False + gui_element_rect = self.get_gui_element_rect(selector, timeout=1) + gui_e_x = gui_element_rect["x"] + gui_e_y = gui_element_rect["y"] x_offset = 26 y_offset = 35 if shared_utils.is_windows(): x_offset = 29 - x = e_x + x_offset - y = e_y + y_offset + x = gui_e_x + x_offset + y = gui_e_y + y_offset sb_config._saved_cf_x_y = (x, y) time.sleep(0.08) if use_cdp: @@ -2025,6 +2050,33 @@ def __gui_click_recaptcha(self, use_cdp=False): time.sleep(0.056) else: self.gui_click_x_y(x, y) + return True + return False + + def __cdp_click_incapsula_hcaptcha(self): + selector = None + if self.is_element_visible('iframe[src*="_Incapsula_Resource?"]'): + outer_selector = 'iframe[src*="_Incapsula_Resource?"]' + selector = "iframe[data-hcaptcha-widget-id]" + element = self.get_nested_element(outer_selector, selector) + if not element: + return False + else: + return False + time.sleep(0.05) + self.loop.run_until_complete(self.page.wait()) + time.sleep(0.05) + x_offset = 30 + y_offset = 36 + gui_lock = FileLock(constants.MultiBrowser.PYAUTOGUILOCK) + with gui_lock: # Prevent issues with multiple processes + self.bring_active_window_to_front() + time.sleep(0.05) + with suppress(Exception): + element.click_with_offset(x_offset, y_offset) + time.sleep(0.2) + return True + return False def solve_captcha(self): self.__click_captcha(use_cdp=True) @@ -2046,10 +2098,13 @@ def __click_captcha(self, use_cdp=False): if self._on_a_cf_turnstile_page(source): pass elif self._on_a_g_recaptcha_page(source): - self.__gui_click_recaptcha(use_cdp) - return + result = self.__gui_click_recaptcha(use_cdp) + return result + elif self._on_an_incapsula_hcaptcha_page(): + result = self.__cdp_click_incapsula_hcaptcha() + return result else: - return + return False selector = None if self.is_element_present('[class="cf-turnstile"]'): selector = '[class="cf-turnstile"]' @@ -2085,6 +2140,10 @@ def __click_captcha(self, use_cdp=False): '[class*="turnstile"] div:not([class])' ): selector = '[class*="turnstile"] div:not([class])' + elif self.is_element_present( + "iframe[data-hcaptcha-widget-id]" + ): + selector = "iframe[data-hcaptcha-widget-id]" elif self.is_element_present( '[data-callback="onCaptchaSuccess"]' ): @@ -2094,9 +2153,9 @@ def __click_captcha(self, use_cdp=False): ): selector = "div:not([class]) > div:not([class])" else: - return + return False if not selector: - return + return False if ( self.is_element_present("form") and ( @@ -2204,6 +2263,8 @@ def __click_captcha(self, use_cdp=False): time.sleep(0.05) else: self.gui_click_x_y(x, y) + return True + return False def __gui_drag_drop(self, x1, y1, x2, y2, timeframe=0.25, uc_lock=False): self.__install_pyautogui_if_missing() diff --git a/seleniumbase/fixtures/base_case.py b/seleniumbase/fixtures/base_case.py index fd361079234..5a17810165f 100644 --- a/seleniumbase/fixtures/base_case.py +++ b/seleniumbase/fixtures/base_case.py @@ -2415,7 +2415,7 @@ def click_if_visible(self, selector, by="css selector", timeout=0): If a "timeout" is provided, waits that long for the element to appear before giving up and returning without a click().""" if self.__is_cdp_swap_needed(): - self.cdp.click_if_visible(selector) + self.cdp.click_if_visible(selector, timeout=timeout) return self.wait_for_ready_state_complete() if self.is_element_visible(selector, by=by): @@ -2425,6 +2425,7 @@ def click_if_visible(self, selector, by="css selector", timeout=0): self.wait_for_element_visible( selector, by=by, timeout=timeout ) + self.sleep(0.2) if self.is_element_visible(selector, by=by): self.click(selector, by=by) diff --git a/seleniumbase/fixtures/js_utils.py b/seleniumbase/fixtures/js_utils.py index 67e9c872da2..cbeef3035a4 100644 --- a/seleniumbase/fixtures/js_utils.py +++ b/seleniumbase/fixtures/js_utils.py @@ -1374,7 +1374,10 @@ def slow_scroll_to_element(driver, element, *args, **kwargs): element_location_x_fix = 0 if element_location_x + element_width <= screen_width: element_location_x_fix = 0 - distance = element_location_y - scroll_position + if shared_utils.is_cdp_swap_needed(driver): + distance = element_location_y + else: + distance = element_location_y - scroll_position if distance != 0: total_steps = int(abs(distance) / 50.0) + 2.0 step_value = float(distance) / total_steps @@ -1388,7 +1391,8 @@ def slow_scroll_to_element(driver, element, *args, **kwargs): scroll_script = "window.scrollTo(%s, %s);" % ( element_location_x_fix, element_location_y ) - execute_script(driver, scroll_script) + if not shared_utils.is_cdp_swap_needed(driver): + execute_script(driver, scroll_script) time.sleep(0.01) if distance > 430 or distance < -300: # Add small recovery time for long-distance slow-scrolling diff --git a/seleniumbase/undetected/cdp_driver/cdp_util.py b/seleniumbase/undetected/cdp_driver/cdp_util.py index 674cd5fa119..1f1d5e5a4c8 100644 --- a/seleniumbase/undetected/cdp_driver/cdp_util.py +++ b/seleniumbase/undetected/cdp_driver/cdp_util.py @@ -39,16 +39,15 @@ def __activate_standard_virtual_display(): width = settings.HEADLESS_START_WIDTH height = settings.HEADLESS_START_HEIGHT with suppress(Exception): - _xvfb_display = Display( - visible=0, size=(width, height) - ) + _xvfb_display = Display(visible=0, size=(width, height)) _xvfb_display.start() + time.sleep(0.03) sb_config._virtual_display = _xvfb_display sb_config.headless_active = True def __activate_virtual_display_as_needed( - headless, headed, xvfb, xvfb_metrics + headless, headed, xvfb, xvfb_metrics, override_display=False ): """This is only needed on Linux.""" reset_virtual_display = False @@ -72,6 +71,7 @@ def __activate_virtual_display_as_needed( not hasattr(sb_config, "_virtual_display") or not sb_config._virtual_display or reset_virtual_display + or override_display ) ): from sbvirtualdisplay import Display @@ -109,6 +109,7 @@ def __activate_virtual_display_as_needed( backend="xvfb", use_xauth=True, ) + time.sleep(0.05) if "--debug-display" in sys.argv: print( "Starting VDisplay from cdp_util: (%s, %s)" @@ -672,7 +673,12 @@ async def start( try: driver = await Browser.create(config) except Exception: - time.sleep(0.15) + time.sleep(0.12) + if not host or not port: + __activate_virtual_display_as_needed( + headless, headed, xvfb, xvfb_metrics, override_display=True + ) + time.sleep(0.05) driver = await Browser.create(config) if proxy: sb_config._cdp_proxy = proxy diff --git a/setup.py b/setup.py index 155612ae036..bb2cd90dfa1 100755 --- a/setup.py +++ b/setup.py @@ -154,9 +154,10 @@ 'attrs>=25.4.0', 'certifi>=2026.1.4', 'exceptiongroup>=1.3.1', - 'websockets>=15.0.1', + 'websockets~=15.0.1;python_version<"3.10"', + 'websockets>=16.0;python_version>="3.10"', 'filelock~=3.19.1;python_version<"3.10"', - 'filelock>=3.20.2;python_version>="3.10"', + 'filelock>=3.20.3;python_version>="3.10"', 'fasteners>=0.20', 'mycdp>=1.3.2', 'pynose>=1.5.5', @@ -174,7 +175,7 @@ 'pygments>=2.19.2', 'pyreadline3>=3.5.4;platform_system=="Windows"', 'tabcompleter>=1.4.0', - 'pdbp>=1.8.1', + 'pdbp>=1.8.2', 'idna>=3.11', 'chardet==5.2.0', 'charset-normalizer>=3.4.4,<4',