Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/cdp_mode/ReadMe.md
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@ sb.cdp.get_current_url()
sb.cdp.get_origin()
sb.cdp.get_html(include_shadow_dom=True)
sb.cdp.get_page_source(include_shadow_dom=True)
sb.cdp.get_beautiful_soup(source=None)
sb.cdp.get_user_agent()
sb.cdp.get_cookie_string()
sb.cdp.get_locale_code()
Expand Down
18 changes: 12 additions & 6 deletions examples/cdp_mode/playwright/ReadMe.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ The `SB()` format requires WebDriver, therefore `chromedriver` will be downloade

In the sync formats, `get_endpoint_url()` also applies `nest-asyncio` so that nested event loops are allowed. (Python doesn't allow nested event loops by default). Without this, you'd get the error: `"Cannot run the event loop while another loop is running"` when calling CDP Mode methods (such as `solve_captcha()`) from within the Playwright context manager. This `nest-asyncio` call is done behind-the-scenes so that users don't need to handle this on their own.

Default timeout values are different between Playwright and SeleniumBase. For instance, a 30-second default timeout in a Playwright method might be 10 seconds in the equivalent SeleniumBase method. When specifying custom timeout values, Playwright uses milliseconds, whereas SeleniumBase uses seconds. Eg. `page.wait_for_timeout(500)` is the equivalent of `sb.sleep(0.5)`.

Playwright's `:has-text()` selector is the equivalent of SeleniumBase's `:contains()` selector, except for one small difference: `:has-text()` isn't case-sensitive, but `:contains()` is.

Unlike normal Playwright, you don't need to run `playwright install` before running Stealthy Playwright Mode scripts because the system Chrome will be used. There's also the option of setting `use_chromium=True` to use the unbranded Chromium browser instead, which still supports extensions.

### 🎭 <b translate="no">Stealthy Playwright Mode</b> examples:

Here's an example that queries Microsoft Copilot:
Expand All @@ -105,16 +111,16 @@ with sync_playwright() as p:
page = context.pages[0]
page.goto("https://copilot.microsoft.com")
page.wait_for_selector("textarea#userInput")
sb.sleep(1)
page.wait_for_timeout(1000)
query = "Playwright Python connect_over_cdp() sync example"
page.fill("textarea#userInput", query)
page.click('button[data-testid="submit-button"]')
sb.sleep(3)
page.wait_for_timeout(4000)
sb.solve_captcha()
page.wait_for_selector('button[data-testid*="-thumbs-up"]')
sb.sleep(4)
page.wait_for_timeout(4000)
page.click('button[data-testid*="scroll-to-bottom"]')
sb.sleep(3)
page.wait_for_timeout(3000)
chat_results = '[data-testid="highlighted-chats"]'
result = page.locator(chat_results).inner_text()
print(result.replace("\n\n", " \n"))
Expand All @@ -134,9 +140,9 @@ with sync_playwright() as p:
context = browser.contexts[0]
page = context.pages[0]
page.goto("https://www.bing.com/turing/captcha/challenge")
sb.sleep(3)
page.wait_for_timeout(2000)
sb.solve_captcha()
sb.sleep(3)
page.wait_for_timeout(2000)
```

--------
Expand Down
2 changes: 1 addition & 1 deletion examples/cdp_mode/playwright/raw_basic_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ async def main():
await page.fill("#password", "secret_pass")
await page.click("#log-in")
await page.wait_for_selector("h1")
await driver.sleep(1)
await page.wait_for_timeout(1000)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion examples/cdp_mode/playwright/raw_basic_nested.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
page.fill("#password", "secret_pass")
page.click("#log-in")
page.wait_for_selector("h1")
sb.sleep(1)
page.wait_for_timeout(1000)
2 changes: 1 addition & 1 deletion examples/cdp_mode/playwright/raw_basic_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
page.fill("#password", "secret_pass")
page.click("#log-in")
page.wait_for_selector("h1")
sb.sleep(1)
page.wait_for_timeout(1000)
4 changes: 2 additions & 2 deletions examples/cdp_mode/playwright/raw_bing_cap_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ async def main():
context = browser.contexts[0]
page = context.pages[0]
await page.goto("https://www.bing.com/turing/captcha/challenge")
await driver.sleep(3)
await page.wait_for_timeout(2000)
await driver.solve_captcha()
await driver.sleep(3)
await page.wait_for_timeout(2000)


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions examples/cdp_mode/playwright/raw_bing_cap_nested.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
context = browser.contexts[0]
page = context.pages[0]
page.goto("https://www.bing.com/turing/captcha/challenge")
sb.sleep(3)
page.wait_for_timeout(2000)
sb.solve_captcha()
sb.sleep(3)
page.wait_for_timeout(2000)
4 changes: 2 additions & 2 deletions examples/cdp_mode/playwright/raw_bing_cap_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@
context = browser.contexts[0]
page = context.pages[0]
page.goto("https://www.bing.com/turing/captcha/challenge")
sb.sleep(3)
page.wait_for_timeout(2000)
sb.solve_captcha()
sb.sleep(3)
page.wait_for_timeout(2000)
4 changes: 2 additions & 2 deletions examples/cdp_mode/playwright/raw_cf_cap_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@
context = browser.contexts[0]
page = context.pages[0]
page.goto("https://www.cloudflare.com/login")
sb.sleep(3)
page.wait_for_timeout(4500)
sb.solve_captcha()
sb.sleep(3)
page.wait_for_timeout(3000)
8 changes: 4 additions & 4 deletions examples/cdp_mode/playwright/raw_copilot_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ async def main():
page = context.pages[0]
await page.goto("https://copilot.microsoft.com")
await page.wait_for_selector("textarea#userInput")
await driver.sleep(1)
await page.wait_for_timeout(1000)
query = "Playwright Python connect_over_cdp() sync example"
await page.fill("textarea#userInput", query)
await page.click('button[data-testid="submit-button"]')
await driver.sleep(4)
await page.wait_for_timeout(4000)
await driver.solve_captcha()
await page.wait_for_selector('button[data-testid*="-thumbs-up"]')
await driver.sleep(4)
await page.wait_for_timeout(4000)
await page.click('button[data-testid*="scroll-to-bottom"]')
await driver.sleep(3)
await page.wait_for_timeout(3000)
chat_results = '[data-testid="highlighted-chats"]'
result = await page.locator(chat_results).inner_text()
print(result.replace("\n\n", " \n"))
Expand Down
8 changes: 4 additions & 4 deletions examples/cdp_mode/playwright/raw_copilot_nested.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@
page = context.pages[0]
page.goto("https://copilot.microsoft.com")
page.wait_for_selector("textarea#userInput")
sb.sleep(1)
page.wait_for_timeout(1000)
query = "Playwright Python connect_over_cdp() sync example"
page.fill("textarea#userInput", query)
page.click('button[data-testid="submit-button"]')
sb.sleep(4)
page.wait_for_timeout(4000)
sb.solve_captcha()
page.wait_for_selector('button[data-testid*="-thumbs-up"]')
sb.sleep(4)
page.wait_for_timeout(4000)
page.click('button[data-testid*="scroll-to-bottom"]')
sb.sleep(3)
page.wait_for_timeout(3000)
chat_results = '[data-testid="highlighted-chats"]'
result = page.locator(chat_results).inner_text()
print(result.replace("\n\n", " \n"))
8 changes: 4 additions & 4 deletions examples/cdp_mode/playwright/raw_copilot_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@
page = context.pages[0]
page.goto("https://copilot.microsoft.com")
page.wait_for_selector("textarea#userInput")
sb.sleep(1)
page.wait_for_timeout(1000)
query = "Playwright Python connect_over_cdp() sync example"
page.fill("textarea#userInput", query)
page.click('button[data-testid="submit-button"]')
sb.sleep(4)
page.wait_for_timeout(4000)
sb.solve_captcha()
page.wait_for_selector('button[data-testid*="-thumbs-up"]')
sb.sleep(4)
page.wait_for_timeout(4000)
page.click('button[data-testid*="scroll-to-bottom"]')
sb.sleep(3)
page.wait_for_timeout(3000)
chat_results = '[data-testid="highlighted-chats"]'
result = page.locator(chat_results).inner_text()
print(result.replace("\n\n", " \n"))
11 changes: 6 additions & 5 deletions examples/cdp_mode/playwright/raw_gas_info_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,22 @@ async def main():
"/order-replacement-building-regulations-certificate/"
)
await page.goto(url)
await tab.sleep(0.6)
await page.wait_for_timeout(600)
await tab.solve_captcha()
await page.wait_for_selector("#SearchTerm")
await tab.sleep(1.4)
await page.wait_for_timeout(2000)
allow_cookies = 'button:contains("Allow all cookies")'
await tab.click_if_visible(allow_cookies, timeout=2)
await tab.sleep(1)
await page.wait_for_timeout(1000)
await page.fill("#SearchTerm", "Hydrogen")
await tab.click_if_visible(allow_cookies, timeout=1)
await page.click("button.search-button")
await tab.sleep(3)
await page.wait_for_timeout(3000)
results = await tab.query_selector_all("div.search-result")
for result in results:
print(result.text.replace(" " * 12, " ").strip() + "\n")
await tab.scroll_down(50)
await tab.sleep(1)
await page.wait_for_timeout(1000)


if __name__ == "__main__":
Expand Down
11 changes: 6 additions & 5 deletions examples/cdp_mode/playwright/raw_gas_info_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,20 @@
"/order-replacement-building-regulations-certificate/"
)
page.goto(url)
sb.sleep(0.6)
page.wait_for_timeout(600)
sb.solve_captcha()
page.wait_for_selector("#SearchTerm")
sb.sleep(1.4)
page.wait_for_timeout(2000)
allow_cookies = 'button:contains("Allow all cookies")'
sb.click_if_visible(allow_cookies, timeout=2)
sb.sleep(1)
page.wait_for_timeout(1000)
page.fill("#SearchTerm", "Hydrogen")
sb.click_if_visible(allow_cookies, timeout=1)
page.click("button.search-button")
sb.sleep(3)
page.wait_for_timeout(3000)
items = page.locator("div.search-result")
for i in range(items.count()):
item_text = items.nth(i).inner_text()
print(item_text.replace("\n\n", "\n") + "\n")
sb.scroll_to_bottom()
sb.sleep(1)
page.wait_for_timeout(3000)
6 changes: 3 additions & 3 deletions examples/cdp_mode/playwright/raw_gitlab_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ async def main():
context = browser.contexts[0]
page = context.pages[0]
await page.goto("https://gitlab.com/users/sign_in")
await driver.sleep(3)
await page.wait_for_timeout(3000)
await driver.solve_captcha()
await driver.sleep(1)
await page.wait_for_timeout(1000)
await page.locator('label[for="user_login"]').click()
await page.wait_for_selector('[data-testid="sign-in-button"]')
await page.locator("#user_login").fill("Username")
await driver.sleep(2)
await page.wait_for_timeout(2000)


if __name__ == "__main__":
Expand Down
6 changes: 3 additions & 3 deletions examples/cdp_mode/playwright/raw_gitlab_nested.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
context = browser.contexts[0]
page = context.pages[0]
page.goto("https://gitlab.com/users/sign_in")
sb.sleep(3)
page.wait_for_timeout(3000)
sb.solve_captcha()
sb.sleep(1)
page.wait_for_timeout(1000)
page.locator('label[for="user_login"]').click()
page.wait_for_selector('[data-testid="sign-in-button"]')
page.locator("#user_login").fill("Username")
sb.sleep(2)
page.wait_for_timeout(2000)
6 changes: 3 additions & 3 deletions examples/cdp_mode/playwright/raw_gitlab_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
context = browser.contexts[0]
page = context.pages[0]
page.goto("https://gitlab.com/users/sign_in")
sb.sleep(3)
page.wait_for_timeout(3000)
sb.solve_captcha()
sb.sleep(1)
page.wait_for_timeout(1000)
page.locator('label[for="user_login"]').click()
page.wait_for_selector('[data-testid="sign-in-button"]')
page.locator("#user_login").fill("Username")
sb.sleep(2)
page.wait_for_timeout(2000)
33 changes: 33 additions & 0 deletions examples/cdp_mode/playwright/raw_indeed_sync.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from playwright.sync_api import sync_playwright
from seleniumbase import sb_cdp

sb = sb_cdp.Chrome()
sb.open("https://www.indeed.com/companies/search")
endpoint_url = sb.get_endpoint_url()

with sync_playwright() as p:
browser = p.chromium.connect_over_cdp(endpoint_url)
context = browser.contexts[0]
page = context.pages[0]
search_box = "input#company-search"
if page.locator(search_box).count() == 0:
page.wait_for_timeout(2500)
sb.solve_captcha()
page.wait_for_timeout(1000)
company = "NASA Jet Propulsion Laboratory"
page.click(search_box)
page.fill(search_box, company)
page.click('button[type="submit"]')
page.click('a:has-text("%s")' % company)
name_header = 'div[itemprop="name"]'
page.wait_for_timeout(1000)
if page.locator(name_header).count() == 0:
page.wait_for_timeout(2500)
sb.solve_captcha()
page.wait_for_timeout(1000)
for i in range(10):
sb.scroll_down(12)
sb.sleep(0.14)
info = page.locator('[data-testid="AboutSection-section"]')
soup = sb.get_beautiful_soup(info.inner_html()).get_text("\n")
print("*** %s: ***\n%s" % (company, soup))
2 changes: 1 addition & 1 deletion examples/cdp_mode/playwright/raw_nike_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
page.click('[data-testid="user-tools-container"] search')
search = "Pegasus"
page.fill('input[type="search"]', search)
sb.sleep(4)
page.wait_for_timeout(4000)
details = 'ul[data-testid*="products"] figure .details'
items = page.locator(details)
if items:
Expand Down
9 changes: 5 additions & 4 deletions examples/cdp_mode/playwright/raw_nordstrom_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
context = browser.contexts[0]
page = context.pages[0]
page.goto("https://www.nordstrom.com/")
sb.sleep(2)
page.wait_for_timeout(2000)
page.click("input#keyword-search-input")
sb.sleep(0.8)
page.wait_for_timeout(800)
search = "cocktail dresses for women teal"
sb.press_keys("input#keyword-search-input", search + "\n")
sb.sleep(2.2)
search_box = page.locator("input#keyword-search-input")
search_box.press_sequentially(search + "\n", delay=80)
page.wait_for_timeout(2200)
for i in range(17):
sb.scroll_down(16)
sb.sleep(0.14)
Expand Down
7 changes: 4 additions & 3 deletions examples/cdp_mode/playwright/raw_planetmc_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
context = browser.contexts[0]
page = context.pages[0]
page.goto("https://www.planetminecraft.com/account/sign_in/")
sb.sleep(2)
page.wait_for_timeout(2000)
sb.solve_captcha()
sb.wait_for_element_absent("input[disabled]")
sb.sleep(2)
input_disabled = page.locator("input[disabled]")
input_disabled.wait_for(state="hidden", timeout=5000)
page.wait_for_timeout(2000)
2 changes: 1 addition & 1 deletion examples/cdp_mode/playwright/raw_reddit_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
url = f"https://www.reddit.com/r/webscraping/search/?q={search}"
page.goto(url)
sb.solve_captcha() # Might not be needed
sb.sleep(1)
page.wait_for_timeout(1000)
post_title = '[data-testid="post-title"]'
page.wait_for_selector(post_title)
for i in range(8):
Expand Down
9 changes: 5 additions & 4 deletions examples/cdp_mode/playwright/raw_seatgeek_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@
page.goto("https://seatgeek.com/")
input_field = 'input[name="search"]'
page.wait_for_selector(input_field)
sb.sleep(1.6)
page.wait_for_timeout(1600)
query = "Jerry Seinfeld"
sb.press_keys(input_field, query)
sb.sleep(1.6)
search_box = page.locator(input_field)
search_box.press_sequentially(query, delay=80)
page.wait_for_timeout(1600)
page.click("li#active-result-item")
sb.sleep(4.2)
page.wait_for_timeout(4200)
print('*** SeatGeek Search for "%s":' % query)
items = page.locator('[data-testid="listing-item"]')
for i in range(items.count()):
Expand Down
Loading