diff --git a/examples/cdp_mode/ReadMe.md b/examples/cdp_mode/ReadMe.md index 33e9c2052fa..71d94fcd608 100644 --- a/examples/cdp_mode/ReadMe.md +++ b/examples/cdp_mode/ReadMe.md @@ -692,6 +692,7 @@ await tab.get_current_url() await tab.send_keys(selector, text, timeout=5) await tab.type(selector, text, timeout=5) await tab.click(selector, timeout=5) +await tab.click_if_visible(selector, timeout=0) await tab.click_with_offset(selector, x, y, center=False, timeout=5) await tab.solve_captcha() await tab.click_captcha() # Same as solve_captcha() diff --git a/examples/cdp_mode/playwright/raw_gas_info_async.py b/examples/cdp_mode/playwright/raw_gas_info_async.py new file mode 100644 index 00000000000..404b2e4e6c3 --- /dev/null +++ b/examples/cdp_mode/playwright/raw_gas_info_async.py @@ -0,0 +1,40 @@ +import asyncio +from playwright.async_api import async_playwright +from seleniumbase import cdp_driver + + +async def main(): + driver = await cdp_driver.start_async() + endpoint_url = driver.get_endpoint_url() + tab = await driver.get("about:blank") + + async with async_playwright() as p: + browser = await p.chromium.connect_over_cdp(endpoint_url) + context = browser.contexts[0] + page = context.pages[0] + url = ( + "https://www.gassaferegister.co.uk/gas-safety" + "/gas-safety-certificates-records/building-regulations-certificate" + "/order-replacement-building-regulations-certificate/" + ) + await page.goto(url) + await tab.sleep(0.6) + await tab.solve_captcha() + await page.wait_for_selector("#SearchTerm") + await tab.sleep(1.4) + allow_cookies = 'button:contains("Allow all cookies")' + await tab.click_if_visible(allow_cookies, timeout=2) + await tab.sleep(1) + await page.fill("#SearchTerm", "Hydrogen") + await page.click("button.search-button") + await tab.sleep(3) + results = await tab.query_selector_all("div.search-result") + for result in results: + print(result.text.replace(" " * 12, " ").strip() + "\n") + await tab.scroll_down(50) + await tab.sleep(1) + + +if __name__ == "__main__": + loop = asyncio.new_event_loop() + loop.run_until_complete(main()) diff --git a/examples/cdp_mode/playwright/raw_gas_info_sync.py b/examples/cdp_mode/playwright/raw_gas_info_sync.py index 9b1d74c566b..3880ff62ea8 100644 --- a/examples/cdp_mode/playwright/raw_gas_info_sync.py +++ b/examples/cdp_mode/playwright/raw_gas_info_sync.py @@ -14,9 +14,10 @@ "/order-replacement-building-regulations-certificate/" ) page.goto(url) - sb.sleep(0.5) + sb.sleep(0.6) sb.solve_captcha() page.wait_for_selector("#SearchTerm") + sb.sleep(1.4) allow_cookies = 'button:contains("Allow all cookies")' sb.click_if_visible(allow_cookies, timeout=2) sb.sleep(1) diff --git a/examples/cdp_mode/raw_gas_records.py b/examples/cdp_mode/raw_gas_records.py index c484baa5c2c..2fdaf48d0cc 100644 --- a/examples/cdp_mode/raw_gas_records.py +++ b/examples/cdp_mode/raw_gas_records.py @@ -8,10 +8,10 @@ "/order-replacement-building-regulations-certificate/" ) sb.activate_cdp_mode(url) - sb.sleep(0.5) + sb.sleep(0.6) sb.solve_captcha() sb.wait_for_element("#SearchTerm", timeout=5) - sb.sleep(2) + sb.sleep(1.4) allow_cookies = 'button:contains("Allow all cookies")' sb.click_if_visible(allow_cookies, timeout=2) sb.sleep(1) diff --git a/examples/cdp_mode/raw_kohls.py b/examples/cdp_mode/raw_kohls.py index c809412b13e..4354dc691f4 100644 --- a/examples/cdp_mode/raw_kohls.py +++ b/examples/cdp_mode/raw_kohls.py @@ -2,7 +2,7 @@ with SB(uc=True, test=True, locale="en", incognito=True) as sb: url = "https://www.kohls.com/" - sb.activate_cdp_mode(url) + sb.activate_cdp_mode(url, ad_block=True) sb.sleep(2.6) search = "Mickey Mouse Blanket" req_1 = "Mickey" diff --git a/examples/cdp_mode/raw_pokemon.py b/examples/cdp_mode/raw_pokemon.py index 7b5f2f1badc..38227c06153 100644 --- a/examples/cdp_mode/raw_pokemon.py +++ b/examples/cdp_mode/raw_pokemon.py @@ -1,6 +1,6 @@ from seleniumbase import SB -with SB(uc=True, test=True, locale="en", guest=True) as sb: +with SB(uc=True, test=True, locale="en", ad_block=True) as sb: url = "https://www.pokemon.com/us" sb.activate_cdp_mode(url) sb.sleep(1.5) diff --git a/help_docs/cdp_mode_methods.md b/help_docs/cdp_mode_methods.md index 1092a1b8a2c..b3ba9211d8b 100644 --- a/help_docs/cdp_mode_methods.md +++ b/help_docs/cdp_mode_methods.md @@ -326,6 +326,7 @@ await tab.get_current_url() await tab.send_keys(selector, text, timeout=5) await tab.type(selector, text, timeout=5) await tab.click(selector, timeout=5) +await tab.click_if_visible(selector, timeout=0) await tab.click_with_offset(selector, x, y, center=False, timeout=5) await tab.solve_captcha() await tab.click_captcha() # Same as solve_captcha() diff --git a/requirements.txt b/requirements.txt index bdeca01df67..1929a5f2984 100755 --- a/requirements.txt +++ b/requirements.txt @@ -44,7 +44,7 @@ wsproto==1.2.0;python_version<"3.10" wsproto~=1.3.2;python_version>="3.10" websocket-client~=1.9.0 selenium==4.32.0;python_version<"3.10" -selenium==4.39.0;python_version>="3.10" +selenium==4.40.0;python_version>="3.10" cssselect==1.3.0 nest-asyncio==1.6.0 sortedcontainers==2.4.0 @@ -63,7 +63,7 @@ pytest-rerunfailures==16.1;python_version>="3.10" pytest-xdist==3.8.0 parameterized==0.9.0 behave==1.2.6 -soupsieve~=2.8.1 +soupsieve~=2.8.2 beautifulsoup4~=4.14.3 pyotp==2.9.0 python-xlib==0.33;platform_system=="Linux" diff --git a/seleniumbase/__version__.py b/seleniumbase/__version__.py index 78c6255030b..a790fd98f7a 100755 --- a/seleniumbase/__version__.py +++ b/seleniumbase/__version__.py @@ -1,2 +1,2 @@ # seleniumbase package -__version__ = "4.45.13" +__version__ = "4.46.0" diff --git a/seleniumbase/core/sb_cdp.py b/seleniumbase/core/sb_cdp.py index e9e2fe3ece7..d0579a802ef 100644 --- a/seleniumbase/core/sb_cdp.py +++ b/seleniumbase/core/sb_cdp.py @@ -1995,7 +1995,7 @@ def _on_a_g_recaptcha_page(self, *args, **kwargs): time.sleep(0.1) return True elif "com/recaptcha/api.js" in source: - time.sleep(1.6) # Still loading + time.sleep(1.2) # Maybe still loading try: self.loop.run_until_complete(self.page.wait(0.1)) except Exception: @@ -2084,10 +2084,10 @@ def __cdp_click_incapsula_hcaptcha(self): with suppress(Exception): element.click_with_offset(x_offset, y_offset) was_clicked = True - time.sleep(0.056) + time.sleep(0.075) if was_clicked: # Wait a moment for the click to succeed - time.sleep(0.25) + time.sleep(0.75) self.__slow_mode_pause_if_set() self.loop.run_until_complete(self.page.wait()) if "--debug" in sys.argv: diff --git a/seleniumbase/undetected/cdp_driver/browser.py b/seleniumbase/undetected/cdp_driver/browser.py index 98d77107f1d..1cd63edd92c 100644 --- a/seleniumbase/undetected/cdp_driver/browser.py +++ b/seleniumbase/undetected/cdp_driver/browser.py @@ -261,6 +261,12 @@ def get_rd_port(self): return self.config.port def get_rd_url(self): + """Returns the remote-debugging URL, which is used for + allowing the Playwright integration to launch stealthy. + Also sets an environment variable to hide this warning: + Deprecation: "url.parse() behavior is not standardized". + (github.com/microsoft/playwright-python/issues/3016)""" + os.environ["NODE_NO_WARNINGS"] = "1" host = self.config.host port = self.config.port return f"http://{host}:{port}" diff --git a/seleniumbase/undetected/cdp_driver/tab.py b/seleniumbase/undetected/cdp_driver/tab.py index 1fe5dc0a834..9aab777c760 100644 --- a/seleniumbase/undetected/cdp_driver/tab.py +++ b/seleniumbase/undetected/cdp_driver/tab.py @@ -5,6 +5,7 @@ import logging import pathlib import re +import sys import urllib.parse import warnings from contextlib import suppress @@ -12,6 +13,7 @@ from seleniumbase import config as sb_config from seleniumbase.fixtures import constants from seleniumbase.fixtures import js_utils +from seleniumbase.fixtures import page_utils from seleniumbase.fixtures import shared_utils from typing import Dict, List, Union, Optional, Tuple from . import browser as cdp_browser @@ -1339,16 +1341,29 @@ async def is_element_present(self, selector): return False async def is_element_visible(self, selector): - try: - element = await self.select(selector, timeout=0.01) - except Exception: - return False - if not element: - return False - try: - position = await element.get_position_async() - return (position.width != 0 or position.height != 0) - except Exception: + if ":contains(" not in selector: + try: + element = await self.select(selector, timeout=0.01) + except Exception: + return False + if not element: + return False + try: + position = await element.get_position_async() + return (position.width != 0 or position.height != 0) + except Exception: + return False + else: + with suppress(Exception): + tag_name = selector.split(":contains(")[0].split(" ")[-1] + text = selector.split(":contains(")[1].split(")")[0][1:-1] + element = await self.select(tag_name, timeout=0.01) + if not element: + raise Exception() + element = await self.find_element_by_text(text) + if not element: + raise Exception() + return True return False async def __on_a_cf_turnstile_page(self, source=None): @@ -1369,6 +1384,11 @@ async def __on_a_cf_turnstile_page(self, source=None): return True return False + async def __on_an_incapsula_hcaptcha_page(self, *args, **kwargs): + if await self.is_element_visible('iframe[src*="Incapsula_Resource?"]'): + return True + return False + async def __on_a_g_recaptcha_page(self, *args, **kwargs): await self.sleep(0.4) # reCAPTCHA may need a moment to appear source = await self.get_html() @@ -1382,7 +1402,7 @@ async def __on_a_g_recaptcha_page(self, *args, **kwargs): await self.sleep(0.1) return True elif "com/recaptcha/api.js" in source: - await self.sleep(1.6) # Still loading + await self.sleep(1.2) # Maybe still loading return True return False @@ -1391,18 +1411,34 @@ async def __gui_click_recaptcha(self): if await self.is_element_present('iframe[title="reCAPTCHA"]'): selector = 'iframe[title="reCAPTCHA"]' else: - return + return False await self.sleep(0.5) with suppress(Exception): - element_rect = await self.get_gui_element_rect(selector, timeout=1) + element_rect = await self.get_element_rect(selector, timeout=0.1) e_x = element_rect["x"] e_y = element_rect["y"] + window_rect = await self.get_window_rect() + win_width = window_rect["innerWidth"] + win_height = window_rect["innerHeight"] + if ( + e_x > 1040 + and e_y > 640 + and abs(win_width - e_x) < 110 + and abs(win_height - e_y) < 110 + ): + # Probably the invisible reCAPTCHA in the bottom right corner + return False + gui_element_rect = await self.get_gui_element_rect( + selector, timeout=1 + ) + gui_e_x = gui_element_rect["x"] + gui_e_y = gui_element_rect["y"] x_offset = 26 y_offset = 35 if await asyncio.to_thread(shared_utils.is_windows): x_offset = 29 - x = e_x + x_offset - y = e_y + y_offset + x = gui_e_x + x_offset + y = gui_e_y + y_offset sb_config._saved_cf_x_y = (x, y) # For debugging later await self.sleep(0.11) gui_lock = FileLock(constants.MultiBrowser.PYAUTOGUILOCK) @@ -1413,6 +1449,53 @@ async def __gui_click_recaptcha(self): selector, x_offset, y_offset, timeout=1 ) await self.sleep(0.22) + return True + return False + + async def __cdp_click_incapsula_hcaptcha(self): + selector = None + if await self.is_element_visible('iframe[src*="Incapsula_Resource?"]'): + outer_selector = 'iframe[src*="Incapsula_Resource?"]' + selector = "iframe[data-hcaptcha-widget-id]" + outer_element = await self.find_element_by_text(outer_selector) + element = await outer_element.query_selector_async(selector) + if not element: + return False + else: + return False + await self.sleep(0.55) + x_offset = 30 + y_offset = 36 + was_clicked = False + gui_lock = FileLock(constants.MultiBrowser.PYAUTOGUILOCK) + with gui_lock: # Prevent issues with multiple processes + await self.bring_to_front() + await self.sleep(0.056) + if "--debug" in sys.argv: + displayed_selector = "`%s`" % selector + if '"' not in selector: + displayed_selector = '"%s"' % selector + elif "'" not in selector: + displayed_selector = "'%s'" % selector + print( + " click_with_offset(%s, %s, %s)" + % (displayed_selector, x_offset, y_offset) + ) + with suppress(Exception): + await element.mouse_click_with_offset_async( + x=x_offset, y=y_offset, center=False + ) + was_clicked = True + await self.sleep(0.075) + if was_clicked: + # Wait a moment for the click to succeed + await self.sleep(0.75) + if "--debug" in sys.argv: + print(" hCaptcha was clicked!") + return True + if "--debug" in sys.argv: + print(" hCaptcha was NOT clicked!") + return False async def get_element_rect(self, selector, timeout=5): element = await self.select(selector, timeout=timeout) @@ -1505,6 +1588,25 @@ async def click(self, selector, timeout=5): element = await self.find(selector, timeout=timeout) await element.click_async() + async def click_if_visible(self, selector, timeout=0): + original_selector = selector + if (":contains(") in selector: + selector, _ = page_utils.recalculate_selector( + selector, by="css selector", xp_ok=True + ) + if await self.is_element_visible(original_selector): + with suppress(Exception): + element = await self.find(selector, timeout=0.01) + await element.click_async() + elif timeout == 0: + return + else: + with suppress(Exception): + await self.find(selector, timeout=timeout) + if await self.is_element_visible(selector): + element = await self.find(selector, timeout=0.01) + await element.click_async() + async def click_with_offset(self, selector, x, y, center=False, timeout=5): element = await self.find(selector, timeout=timeout) await element.scroll_into_view_async() @@ -1516,10 +1618,13 @@ async def solve_captcha(self): if await self.__on_a_cf_turnstile_page(source): pass elif await self.__on_a_g_recaptcha_page(source): - await self.__gui_click_recaptcha() - return + result = await self.__gui_click_recaptcha() + return result + elif await self.__on_an_incapsula_hcaptcha_page(): + result = await self.__cdp_click_incapsula_hcaptcha() + return result else: - return + return False selector = None if await self.is_element_present('[class="cf-turnstile"]'): selector = '[class="cf-turnstile"]' @@ -1568,9 +1673,9 @@ async def solve_captcha(self): ): selector = "div:not([class]) > div:not([class])" else: - return + return False if not selector: - return + return False if ( await self.is_element_present("form") and ( @@ -1670,6 +1775,8 @@ async def solve_captcha(self): selector, x_offset, y_offset, timeout=1 ) await self.sleep(0.22) + return True + return False async def click_captcha(self): await self.solve_captcha() diff --git a/setup.py b/setup.py index bb2cd90dfa1..2e4cc7ea4af 100755 --- a/setup.py +++ b/setup.py @@ -192,7 +192,7 @@ 'wsproto~=1.3.2;python_version>="3.10"', 'websocket-client~=1.9.0', 'selenium==4.32.0;python_version<"3.10"', - 'selenium==4.39.0;python_version>="3.10"', + 'selenium==4.40.0;python_version>="3.10"', 'cssselect==1.3.0', 'nest-asyncio==1.6.0', 'sortedcontainers==2.4.0', @@ -211,7 +211,7 @@ 'pytest-xdist==3.8.0', 'parameterized==0.9.0', 'behave==1.2.6', # Newer ones had issues - 'soupsieve~=2.8.1', + 'soupsieve~=2.8.2', 'beautifulsoup4~=4.14.3', 'pyotp==2.9.0', 'python-xlib==0.33;platform_system=="Linux"',