diff --git a/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py b/docs/guides/code_examples/playwright_crawler_stagehand/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py b/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py
deleted file mode 100644
index 67b76f1f47..0000000000
--- a/docs/guides/code_examples/playwright_crawler_stagehand/browser_classes.py
+++ /dev/null
@@ -1,101 +0,0 @@
-from __future__ import annotations
-
-from datetime import datetime, timezone
-from typing import TYPE_CHECKING, Any, cast
-
-from stagehand.context import StagehandContext
-from typing_extensions import override
-
-from crawlee.browsers import (
- PlaywrightBrowserController,
- PlaywrightBrowserPlugin,
- PlaywrightPersistentBrowser,
-)
-
-from .support_classes import CrawleeStagehandPage
-
-if TYPE_CHECKING:
- from collections.abc import Mapping
-
- from playwright.async_api import Page
- from stagehand import Stagehand
-
- from crawlee.proxy_configuration import ProxyInfo
-
-
-class StagehandBrowserController(PlaywrightBrowserController):
- @override
- def __init__(
- self, browser: PlaywrightPersistentBrowser, stagehand: Stagehand, **kwargs: Any
- ) -> None:
- # Initialize with browser context instead of browser instance
- super().__init__(browser, **kwargs)
-
- self._stagehand = stagehand
- self._stagehand_context: StagehandContext | None = None
-
- @override
- async def new_page(
- self,
- browser_new_context_options: Mapping[str, Any] | None = None,
- proxy_info: ProxyInfo | None = None,
- ) -> Page:
- # Initialize browser context if not already done
- if not self._browser_context:
- self._browser_context = await self._create_browser_context(
- browser_new_context_options=browser_new_context_options,
- proxy_info=proxy_info,
- )
-
- # Initialize Stagehand context if not already done
- if not self._stagehand_context:
- self._stagehand_context = await StagehandContext.init(
- self._browser_context, self._stagehand
- )
-
- # Create a new page using Stagehand context
- page = await self._stagehand_context.new_page()
-
- pw_page = page._page # noqa: SLF001
-
- # Handle page close event
- pw_page.on(event='close', f=self._on_page_close)
-
- # Update internal state
- self._pages.append(pw_page)
- self._last_page_opened_at = datetime.now(timezone.utc)
-
- self._total_opened_pages += 1
-
- # Wrap StagehandPage to provide Playwright Page interface
- return cast('Page', CrawleeStagehandPage(page))
-
-
-class StagehandPlugin(PlaywrightBrowserPlugin):
- """Browser plugin that integrates Stagehand with Crawlee's browser management."""
-
- @override
- def __init__(self, stagehand: Stagehand, **kwargs: Any) -> None:
- super().__init__(**kwargs)
-
- self._stagehand = stagehand
-
- @override
- async def new_browser(self) -> StagehandBrowserController:
- if not self._playwright:
- raise RuntimeError('Playwright browser plugin is not initialized.')
-
- browser = PlaywrightPersistentBrowser(
- # Stagehand can run only on a Chromium-based browser.
- self._playwright.chromium,
- self._user_data_dir,
- self._browser_launch_options,
- )
-
- # Return custom controller with Stagehand
- return StagehandBrowserController(
- browser=browser,
- stagehand=self._stagehand,
- header_generator=None,
- fingerprint_generator=None,
- )
diff --git a/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py b/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py
deleted file mode 100644
index 6cf8cc2689..0000000000
--- a/docs/guides/code_examples/playwright_crawler_stagehand/stagehand_run.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import os
-from typing import cast
-
-from stagehand import StagehandConfig, StagehandPage
-
-from crawlee import ConcurrencySettings
-from crawlee.browsers import BrowserPool
-from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
-
-from .browser_classes import StagehandPlugin
-from .support_classes import CrawleeStagehand
-
-
-async def main() -> None:
- # Configure local Stagehand with Gemini model
- config = StagehandConfig(
- env='LOCAL',
- model_name='google/gemini-2.5-flash-preview-05-20',
- model_api_key=os.getenv('GEMINI_API_KEY'),
- )
-
- # Create Stagehand instance
- stagehand = CrawleeStagehand(config)
-
- # Create crawler with custom browser pool using Stagehand
- crawler = PlaywrightCrawler(
- # Limit the crawl to max requests. Remove or increase it for crawling all links.
- max_requests_per_crawl=10,
- # Custom browser pool. Gives users full control over browsers used by the crawler.
- concurrency_settings=ConcurrencySettings(max_tasks_per_minute=10),
- browser_pool=BrowserPool(
- plugins=[
- StagehandPlugin(stagehand, browser_launch_options={'headless': True})
- ],
- ),
- )
-
- # Define the default request handler, which will be called for every request.
- @crawler.router.default_handler
- async def request_handler(context: PlaywrightCrawlingContext) -> None:
- context.log.info(f'Processing {context.request.url} ...')
-
- # Cast to StagehandPage for proper type hints in IDE
- page = cast('StagehandPage', context.page)
-
- # Use regular Playwright method
- playwright_title = await page.title()
- context.log.info(f'Playwright page title: {playwright_title}')
-
- # highlight-start
- # Use AI-powered extraction with natural language
- gemini_title = await page.extract('Extract page title')
- context.log.info(f'Gemini page title: {gemini_title}')
- # highlight-end
-
- await context.enqueue_links()
-
- # Run the crawler with the initial list of URLs.
- await crawler.run(['https://crawlee.dev/'])
-
-
-if __name__ == '__main__':
- asyncio.run(main())
diff --git a/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py b/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py
deleted file mode 100644
index cccb62e989..0000000000
--- a/docs/guides/code_examples/playwright_crawler_stagehand/support_classes.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any
-
-from stagehand import Stagehand, StagehandPage
-
-if TYPE_CHECKING:
- from types import TracebackType
-
-
-class CrawleeStagehandPage:
- """StagehandPage wrapper for Crawlee."""
-
- def __init__(self, page: StagehandPage) -> None:
- self._page = page
-
- async def goto(
- self,
- url: str,
- *,
- referer: str | None = None,
- timeout: int | None = None,
- wait_until: str | None = None,
- ) -> Any:
- """Navigate to the specified URL."""
- # Override goto to return navigation result that `PlaywrightCrawler` expects
- return await self._page._page.goto( # noqa: SLF001
- url,
- referer=referer,
- timeout=timeout,
- wait_until=wait_until,
- )
-
- def __getattr__(self, name: str) -> Any:
- """Delegate all other methods to the underlying StagehandPage."""
- return getattr(self._page, name)
-
- async def __aenter__(self) -> CrawleeStagehandPage:
- """Enter the context manager."""
- return self
-
- async def __aexit__(
- self,
- exc_type: type[BaseException] | None,
- exc_value: BaseException | None,
- exc_traceback: TracebackType | None,
- ) -> None:
- await self._page.close()
-
-
-class CrawleeStagehand(Stagehand):
- """Stagehand wrapper for Crawlee to disable the launch of Playwright."""
-
- async def init(self) -> None:
- # Skip Stagehand's own Playwright initialization
- # Let Crawlee's PlaywrightBrowserPlugin manage the browser lifecycle
- self._initialized = True
diff --git a/docs/guides/playwright_crawler_stagehand.mdx b/docs/guides/playwright_crawler_stagehand.mdx
deleted file mode 100644
index 59a34b4cd2..0000000000
--- a/docs/guides/playwright_crawler_stagehand.mdx
+++ /dev/null
@@ -1,66 +0,0 @@
----
-id: playwright-crawler-stagehand
-title: Playwright with Stagehand
-description: How to integrate Stagehand AI-powered automation with PlaywrightCrawler.
----
-
-import ApiLink from '@site/src/components/ApiLink';
-import CodeBlock from '@theme/CodeBlock';
-
-import SupportClasses from '!!raw-loader!./code_examples/playwright_crawler_stagehand/support_classes.py';
-import BrowserClasses from '!!raw-loader!./code_examples/playwright_crawler_stagehand/browser_classes.py';
-import StagehandRun from '!!raw-loader!./code_examples/playwright_crawler_stagehand/stagehand_run.py';
-
-[Stagehand](https://docs.stagehand.dev/) is a framework that combines [Playwright](https://playwright.dev/python/) with AI-driven natural language understanding and decision-making capabilities. With Stagehand, you can use natural language instructions to interact with web pages instead of writing complex selectors and automation logic.
-
-Stagehand supports multiple AI models through [`LiteLLM`](https://docs.litellm.ai/docs/). This guide demonstrates how to integrate Stagehand with `PlaywrightCrawler` using [Gemini](https://ai.google.dev/gemini-api/docs) as the AI model provider.
-
-:::info
-
-This guide is based on stagehand-python v0.4.0 with local configuration settings and may not be compatible with newer versions.
-
-:::
-
-## Get Gemini API key
-
-You need to register with [Google AI Studio](https://aistudio.google.com/) and navigate to [Get API key](https://aistudio.google.com/app/apikey) to obtain your API key.
-
-## Create support classes for Stagehand
-
-To integrate Stagehand with Crawlee, you need to create wrapper classes that allow `PlaywrightBrowserPlugin` to manage the Playwright lifecycle.
-
-Create `CrawleeStagehand` - a custom Stagehand subclass that overrides the `init` method to prevent Stagehand from launching its own Playwright instance.
-
-Create `CrawleeStagehandPage` - a wrapper class for `StagehandPage` that implements the [Playwright Page](https://playwright.dev/python/docs/next/api/class-page) behavior expected by `PlaywrightCrawler`.
-
-
- {SupportClasses}
-
-
-## Create browser integration classes
-
-You need to create a custom browser plugin and controller that properly initialize Stagehand and obtain browser pages from `StagehandContext`.
-
-Create `StagehandPlugin` - a subclass of `PlaywrightBrowserPlugin` that holds the Stagehand instance and creates `PlaywrightPersistentBrowser` instances.
-
-Create `StagehandBrowserController` - a subclass of `PlaywrightBrowserController` that lazily initializes `StagehandContext` and creates new pages with AI capabilities on demand.
-
-
- {BrowserClasses}
-
-
-## Create a crawler
-
-Now you can create a `PlaywrightCrawler` that uses Stagehand's AI capabilities to interact with web pages using natural language commands:
-
-
- {StagehandRun}
-
-
-The integration works through several key components:
-- `CrawleeStagehand` prevents Stagehand from launching its own Playwright instance, allowing Crawlee to manage the browser lifecycle
-- `StagehandPlugin` extends the Playwright browser plugin to create Stagehand-enabled browser instances
-- `StagehandBrowserController` uses `StagehandContext` to create pages with AI capabilities
-- `CrawleeStagehandPage` provides interface compatibility between Stagehand pages and Crawlee's expectations
-
-In the request handler, you can use natural language commands like `page.extract('Extract title page')` to perform intelligent data extraction without writing complex selectors.
diff --git a/pyproject.toml b/pyproject.toml
index 4a5444ea44..bd91dd98cd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -77,6 +77,9 @@ sql_postgres = [
"sqlalchemy[asyncio]>=2.0.0,<3.0.0",
"asyncpg>=0.24.0"
]
+stragehard = [
+ "stagehand>=3.19.0",
+]
sql_sqlite = [
"sqlalchemy[asyncio]>=2.0.0,<3.0.0",
"aiosqlite>=0.21.0",
diff --git a/src/crawlee/browsers/_stagehand_browser_controller.py b/src/crawlee/browsers/_stagehand_browser_controller.py
new file mode 100644
index 0000000000..c391471f2c
--- /dev/null
+++ b/src/crawlee/browsers/_stagehand_browser_controller.py
@@ -0,0 +1,193 @@
+from __future__ import annotations
+
+from asyncio import Lock
+from datetime import datetime, timedelta, timezone
+from logging import getLogger
+from typing import TYPE_CHECKING, Any, cast
+
+from playwright.async_api import Browser, BrowserContext, Page, ProxySettings
+from typing_extensions import override
+
+from crawlee._utils.docs import docs_group
+from crawlee.browsers._browser_controller import BrowserController
+from crawlee.browsers._types import StagehandPage
+
+if TYPE_CHECKING:
+ from collections.abc import Mapping
+
+ from stagehand import AsyncSession
+
+ from crawlee.browsers._types import BrowserType
+ from crawlee.proxy_configuration import ProxyInfo
+
+logger = getLogger(__name__)
+
+
+@docs_group('Browser management')
+class StagehandBrowserController(BrowserController):
+ """Controller for managing a Stagehand-controlled browser instance.
+
+ Bridges Crawlee's browser management with Stagehand: provides page creation via
+ Playwright (connected to Stagehand's browser via CDP) and exposes the Stagehand
+ session so the crawling context can access AI methods (act/extract/observe).
+ """
+
+ AUTOMATION_LIBRARY = 'stagehand'
+
+ def __init__(
+ self,
+ browser: Browser,
+ session: AsyncSession,
+ *,
+ max_open_pages_per_browser: int = 20,
+ ) -> None:
+ """Initialize a new instance.
+
+ Args:
+ browser: Playwright browser connected to Stagehand via CDP.
+ session: Active Stagehand session used for AI operations.
+ max_open_pages_per_browser: Maximum number of pages open at the same time.
+ """
+ self._browser = browser
+ self._session = session
+ self._max_open_pages_per_browser = max_open_pages_per_browser
+
+ self._browser_context: BrowserContext | None = None
+ self._pages = list[Page]()
+ self._total_opened_pages = 0
+ self._opening_pages_count = 0
+ self._last_page_opened_at = datetime.now(timezone.utc)
+ self._context_creation_lock: Lock | None = None
+
+ @property
+ @override
+ def pages(self) -> list[Page]:
+ return self._pages # type: ignore[return-value]
+
+ @property
+ @override
+ def total_opened_pages(self) -> int:
+ return self._total_opened_pages
+
+ @property
+ @override
+ def pages_count(self) -> int:
+ return len(self._pages)
+
+ @property
+ @override
+ def last_page_opened_at(self) -> datetime:
+ return self._last_page_opened_at
+
+ @property
+ @override
+ def idle_time(self) -> timedelta:
+ return datetime.now(timezone.utc) - self._last_page_opened_at
+
+ @property
+ @override
+ def has_free_capacity(self) -> bool:
+ return (self.pages_count + self._opening_pages_count) < self._max_open_pages_per_browser
+
+ @property
+ @override
+ def is_browser_connected(self) -> bool:
+ return self._browser.is_connected()
+
+ @property
+ @override
+ def browser_type(self) -> BrowserType:
+ return 'chromium'
+
+ async def _get_context_creation_lock(self) -> Lock:
+ if self._context_creation_lock is None:
+ self._context_creation_lock = Lock()
+ return self._context_creation_lock
+
+ @override
+ async def new_page(
+ self,
+ browser_new_context_options: Mapping[str, Any] | None = None,
+ proxy_info: ProxyInfo | None = None,
+ ) -> StagehandPage:
+ """Create a new page in the Stagehand-managed browser.
+
+ Args:
+ browser_new_context_options: Ignored. Context is managed by Stagehand via CDP.
+ proxy_info: Proxy configuration applied when creating the shared browser context.
+ All pages share one context, so proxy is fixed on the first call.
+
+ Returns:
+ The newly created page.
+
+ Raises:
+ ValueError: If the browser has reached the maximum number of open pages.
+ """
+ if not self.has_free_capacity:
+ raise ValueError('Cannot open more pages in this browser.')
+
+ if browser_new_context_options:
+ logger.warning(
+ 'browser_new_context_options are ignored by StagehandBrowserController. '
+ 'The existing CDP context is reused.'
+ )
+
+ self._opening_pages_count += 1
+
+ try:
+ async with await self._get_context_creation_lock():
+ if self._browser_context is None:
+ if proxy_info:
+ self._browser_context = await self._browser.new_context(
+ proxy=ProxySettings(
+ server=f'{proxy_info.scheme}://{proxy_info.hostname}:{proxy_info.port}',
+ username=proxy_info.username,
+ password=proxy_info.password,
+ )
+ )
+ elif self._browser.contexts:
+ # Reuse the existing CDP context when no proxy is needed.
+ self._browser_context = self._browser.contexts[0]
+ else:
+ self._browser_context = await self._browser.new_context()
+ elif proxy_info:
+ logger.warning(
+ 'proxy_info is ignored for subsequent pages — all pages share the same browser context.'
+ )
+
+ raw_page = await self._browser_context.new_page()
+ page = StagehandPage(raw_page, self._session)
+ raw_page.on('close', lambda _: self._on_page_close(cast('Page', page)))
+
+ self._pages.append(page)
+ self._last_page_opened_at = datetime.now(timezone.utc)
+ self._total_opened_pages += 1
+ finally:
+ self._opening_pages_count -= 1
+
+ return page
+
+ @override
+ async def close(self, *, force: bool = False) -> None:
+ """End the Stagehand session and close the browser connection.
+
+ Args:
+ force: Whether to force close all open pages before closing.
+
+ Raises:
+ ValueError: If there are still open pages when closing without force.
+ """
+ if self.pages_count > 0 and not force:
+ raise ValueError('Cannot close the browser while there are open pages.')
+
+ try:
+ await self._session.end()
+ except Exception:
+ logger.warning('Failed to end Stagehand session gracefully.', exc_info=True)
+
+ if self._browser.is_connected():
+ await self._browser.close()
+
+ def _on_page_close(self, page: Page) -> None:
+ """Handle actions after a page is closed."""
+ self._pages.remove(page)
diff --git a/src/crawlee/browsers/_stagehand_browser_plugin.py b/src/crawlee/browsers/_stagehand_browser_plugin.py
new file mode 100644
index 0000000000..65a65dbf76
--- /dev/null
+++ b/src/crawlee/browsers/_stagehand_browser_plugin.py
@@ -0,0 +1,221 @@
+from __future__ import annotations
+
+from logging import getLogger
+from typing import TYPE_CHECKING, Any
+
+from playwright.async_api import Playwright, async_playwright
+from stagehand import AsyncStagehand
+from typing_extensions import override
+
+from crawlee import service_locator
+from crawlee._utils.context import ensure_context
+from crawlee._utils.docs import docs_group
+from crawlee.browsers._browser_plugin import BrowserPlugin
+from crawlee.browsers._stagehand_browser_controller import StagehandBrowserController
+from crawlee.browsers._types import StagehandOptions
+
+if TYPE_CHECKING:
+ from collections.abc import Mapping
+ from types import TracebackType
+
+ from crawlee.browsers._browser_controller import BrowserController
+ from crawlee.browsers._types import BrowserType
+
+logger = getLogger(__name__)
+
+
+@docs_group('Browser management')
+class StagehandBrowserPlugin(BrowserPlugin):
+ """A plugin for managing Stagehand AI-powered browser automation.
+
+ Stagehand creates and manages the browser instance (local binary or Browserbase cloud).
+ Playwright then connects to it via CDP, enabling both standard Playwright automation
+ and AI-powered operations in the same crawling context.
+
+ Only Chromium is supported because Stagehand relies on the Chrome DevTools Protocol.
+ """
+
+ AUTOMATION_LIBRARY = 'stagehand'
+
+ def __init__(
+ self,
+ *,
+ stagehand_options: StagehandOptions | None = None,
+ browser_new_context_options: dict[str, Any] | None = None,
+ max_open_pages_per_browser: int = 20,
+ local_ready_timeout_s: float = 30.0,
+ ) -> None:
+ """Initialize a new instance.
+
+ Args:
+ stagehand_options: Stagehand-specific configuration. Defaults to
+ ``StagehandOptions()`` if not provided.
+ browser_new_context_options: Options passed to Playwright's
+ ``browser.new_context`` after connecting via CDP. Refer to the
+ Playwright documentation for available options:
+ https://playwright.dev/python/docs/api/class-browser#browser-new-context.
+ max_open_pages_per_browser: Maximum number of pages that can be open per browser.
+ local_ready_timeout_s: Seconds to wait for the local Stagehand binary to
+ become ready. Only relevant when ``env='LOCAL'``.
+ """
+ opts = stagehand_options or StagehandOptions()
+ config = service_locator.get_configuration()
+
+ self._opts = opts
+ self._browser_new_context_options = browser_new_context_options or {}
+ self._max_open_pages_per_browser = max_open_pages_per_browser
+
+ # headless comes from Configuration, same as PlaywrightBrowserPlugin.
+ # chrome_path is resolved lazily in __aenter__ once Playwright is available.
+ self._headless = config.headless
+ self._chrome_path: str | None = config.default_browser_path
+
+ is_local = opts.env == 'LOCAL'
+ self._stagehand_init_kwargs: dict[str, Any] = {
+ 'server': 'local' if is_local else 'remote',
+ 'local_headless': self._headless,
+ 'local_ready_timeout_s': local_ready_timeout_s,
+ }
+ if is_local:
+ self._stagehand_init_kwargs['model_api_key'] = opts.api_key
+ else:
+ self._stagehand_init_kwargs['browserbase_api_key'] = opts.api_key
+ self._stagehand_init_kwargs['browserbase_project_id'] = opts.project_id
+
+ # AsyncStagehand is created lazily in __aenter__ so that chrome_path
+ # can be resolved from playwright.chromium.executable_path if not set.
+ self._stagehand_context_manager: AsyncStagehand | None = None
+ self._stagehand_client: AsyncStagehand | None = None
+
+ self._playwright_context_manager = async_playwright()
+ self._playwright: Playwright | None = None
+
+ # Flag to indicate the context state.
+ self._active = False
+
+ @property
+ @override
+ def active(self) -> bool:
+ return self._active
+
+ @property
+ @override
+ def browser_type(self) -> BrowserType:
+ return 'chromium'
+
+ @property
+ @override
+ def browser_launch_options(self) -> Mapping[str, Any]:
+ """Return an empty mapping.
+
+ Browser launch is managed by Stagehand, not Playwright directly.
+ """
+ return {}
+
+ @property
+ @override
+ def browser_new_context_options(self) -> Mapping[str, Any]:
+ """Return the options for the ``browser.new_context`` method.
+
+ These options are passed to Playwright's ``browser.new_context`` after
+ connecting to the Stagehand-managed browser via CDP. Refer to the Playwright
+ documentation for available options:
+ https://playwright.dev/python/docs/api/class-browser#browser-new-context.
+ """
+ return self._browser_new_context_options
+
+ @property
+ @override
+ def max_open_pages_per_browser(self) -> int:
+ return self._max_open_pages_per_browser
+
+ @override
+ async def __aenter__(self) -> StagehandBrowserPlugin:
+ if self._active:
+ raise RuntimeError(f'The {self.__class__.__name__} is already active.')
+
+ self._active = True
+ self._playwright = await self._playwright_context_manager.__aenter__()
+
+ # Resolve Chromium path from Playwright's own installation when not set
+ # explicitly via Configuration. The stagehand binary needs an explicit path.
+ if self._chrome_path is None and self._opts.env == 'LOCAL':
+ self._chrome_path = self._playwright.chromium.executable_path
+ self._stagehand_init_kwargs['local_chrome_path'] = self._chrome_path
+ logger.debug(f'Resolved Chromium path from Playwright: {self._chrome_path}')
+
+ self._stagehand_context_manager = AsyncStagehand(**self._stagehand_init_kwargs)
+ self._stagehand_client = await self._stagehand_context_manager.__aenter__()
+
+ return self
+
+ @override
+ async def __aexit__(
+ self,
+ exc_type: type[BaseException] | None,
+ exc_value: BaseException | None,
+ exc_traceback: TracebackType | None,
+ ) -> None:
+ if not self._active:
+ raise RuntimeError(f'The {self.__class__.__name__} is not active.')
+
+ if self._stagehand_context_manager is not None:
+ await self._stagehand_context_manager.__aexit__(exc_type, exc_value, exc_traceback)
+
+ await self._playwright_context_manager.__aexit__(exc_type, exc_value, exc_traceback)
+
+ self._stagehand_context_manager = None
+ self._playwright_context_manager = async_playwright()
+ self._stagehand_client = None
+ self._playwright = None
+ self._active = False
+
+ @override
+ @ensure_context
+ async def new_browser(self) -> BrowserController:
+ if not self._playwright or not self._stagehand_client:
+ raise RuntimeError(f'{self.__class__.__name__} is not initialized.')
+
+ session = await self._stagehand_client.sessions.start(**self._build_session_kwargs())
+
+ cdp_url = session.data.cdp_url
+ if not cdp_url:
+ raise RuntimeError(
+ f'No cdp_url returned from Stagehand (env={self._opts.env!r}). '
+ 'Cannot connect Playwright to the browser.'
+ )
+
+ browser = await self._playwright.chromium.connect_over_cdp(cdp_url)
+
+ return StagehandBrowserController(
+ browser,
+ session,
+ max_open_pages_per_browser=self._max_open_pages_per_browser,
+ )
+
+ def _build_session_kwargs(self) -> dict[str, Any]:
+ """Build keyword arguments for ``sessions.start``."""
+ opts = self._opts
+
+ if opts.env == 'BROWSERBASE':
+ browser_param: dict[str, Any] = {'type': 'browserbase'}
+ else:
+ launch_options: dict[str, Any] = {'headless': self._headless}
+ browser_param = {
+ 'type': 'local',
+ 'launchOptions': launch_options,
+ } # , 'local_chrome_path': self._chrome_path}
+
+ kwargs: dict[str, Any] = {
+ 'model_name': opts.model,
+ 'browser': browser_param,
+ 'verbose': opts.verbose,
+ 'self_heal': opts.self_heal,
+ }
+
+ if opts.dom_settle_timeout_ms is not None:
+ kwargs['dom_settle_timeout_ms'] = opts.dom_settle_timeout_ms
+ if opts.system_prompt is not None:
+ kwargs['system_prompt'] = opts.system_prompt
+
+ return kwargs
diff --git a/src/crawlee/browsers/_types.py b/src/crawlee/browsers/_types.py
index c5976b086a..083c542268 100644
--- a/src/crawlee/browsers/_types.py
+++ b/src/crawlee/browsers/_types.py
@@ -1,10 +1,22 @@
from __future__ import annotations
from dataclasses import dataclass
-from typing import TYPE_CHECKING, Literal
+from typing import TYPE_CHECKING, Any, Literal
+
+from playwright.async_api import Page
if TYPE_CHECKING:
- from playwright.async_api import Page
+ from stagehand import AsyncSession
+ from stagehand.types.session_act_params import SessionActParams
+ from stagehand.types.session_act_response import SessionActResponse
+ from stagehand.types.session_execute_params import SessionExecuteParams
+ from stagehand.types.session_execute_response import SessionExecuteResponse
+ from stagehand.types.session_extract_params import SessionExtractParams
+ from stagehand.types.session_extract_response import SessionExtractResponse
+ from stagehand.types.session_observe_params import SessionObserveParams
+ from stagehand.types.session_observe_response import SessionObserveResponse
+ from typing_extensions import Unpack
+
BrowserType = Literal['chromium', 'firefox', 'webkit', 'chrome']
@@ -16,3 +28,80 @@ class CrawleePage:
id: str
browser_type: BrowserType
page: Page
+
+
+@dataclass
+class StagehandOptions:
+ env: Literal['LOCAL', 'BROWSERBASE'] = 'LOCAL'
+ api_key: str | None = None
+ project_id: str | None = None
+ model: str = 'openai/gpt-4.1-mini'
+ verbose: Literal[0, 1, 2] = 0
+ self_heal: bool = True
+ dom_settle_timeout_ms: float | None = None
+ system_prompt: str | None = None
+
+
+class StagehandPage(Page):
+ """A Playwright `Page` enhanced with Stagehand AI methods.
+
+ Wraps a Playwright `Page` and an `AsyncSession`, proxying all standard Playwright
+ methods transparently while adding `act()`, `extract()`, and `observe()` AI operations
+ bound to the current page.
+ """
+
+ def __init__(self, page: Page, session: AsyncSession) -> None:
+ self._page = page
+ self._session = session
+
+ def __getattr__(self, name: str) -> Any:
+ return getattr(self._page, name)
+
+ async def act(self, **kwargs: Unpack[SessionActParams]) -> SessionActResponse:
+ """Perform an action on the page using natural language.
+
+ Args:
+ **kwargs: Parameters passed to ``AsyncSession.act()``.
+ The most common is ``instruction`` — a natural language description
+ of the action to perform, e.g. ``instruction='click the login button'``.
+
+ Returns:
+ The action result from Stagehand.
+ """
+ return await self._session.act(page=self._page, **kwargs)
+
+ async def observe(self, **kwargs: Unpack[SessionObserveParams]) -> SessionObserveResponse:
+ """Observe the page and get AI-suggested actions.
+
+ Args:
+ **kwargs: Parameters passed to ``AsyncSession.observe()``.
+ Optionally pass ``instruction`` to narrow the observation scope.
+
+ Returns:
+ Observation result with suggested actions.
+ """
+ return await self._session.observe(page=self._page, **kwargs)
+
+ async def extract(self, **kwargs: Unpack[SessionExtractParams]) -> SessionExtractResponse:
+ """Extract structured data from the page using natural language.
+
+ Args:
+ **kwargs: Parameters passed to ``AsyncSession.extract()``.
+ Common parameters: ``instruction`` and ``schema`` (JSON Schema dict).
+
+ Returns:
+ Extracted data matching the requested schema.
+ """
+ return await self._session.extract(page=self._page, **kwargs)
+
+ async def execute(self, **kwargs: Unpack[SessionExecuteParams]) -> SessionExecuteResponse:
+ """Execute arbitrary code on the page via natural language instructions.
+
+ Args:
+ **kwargs: Parameters passed to ``AsyncSession.execute()``.
+ Common parameters: ``instruction`` describing the code to execute.
+
+ Returns:
+ The result of the executed code.
+ """
+ return await self._session.execute(page=self._page, **kwargs)
diff --git a/src/crawlee/crawlers/_stagehand/__init__.py b/src/crawlee/crawlers/_stagehand/__init__.py
new file mode 100644
index 0000000000..1199f0cf2b
--- /dev/null
+++ b/src/crawlee/crawlers/_stagehand/__init__.py
@@ -0,0 +1,3 @@
+from crawlee.crawlers._stagehand._stagehand_crawler import StagehandCrawler
+
+__all__ = ['StagehandCrawler']
diff --git a/src/crawlee/crawlers/_stagehand/_stagehand_crawler.py b/src/crawlee/crawlers/_stagehand/_stagehand_crawler.py
new file mode 100644
index 0000000000..b367cc727a
--- /dev/null
+++ b/src/crawlee/crawlers/_stagehand/_stagehand_crawler.py
@@ -0,0 +1,99 @@
+from __future__ import annotations
+
+import warnings
+from typing import TYPE_CHECKING, Any
+
+from crawlee._utils.docs import docs_group
+from crawlee.browsers import BrowserPool
+from crawlee.browsers._stagehand_browser_plugin import StagehandBrowserPlugin
+from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
+
+if TYPE_CHECKING:
+ from typing_extensions import Unpack
+
+ from crawlee.browsers._types import StagehandOptions
+ from crawlee.crawlers._basic import BasicCrawlerOptions
+ from crawlee.statistics import StatisticsState
+
+
+@docs_group('Crawlers')
+class StagehandCrawler(PlaywrightCrawler):
+ """A web crawler that integrates Stagehand AI-powered browser automation with Crawlee.
+
+ Extends `PlaywrightCrawler` with a `StagehandBrowserPlugin` that manages a Stagehand
+ session per browser instance. Each page in the crawling context is a `StagehandPage`,
+ which exposes AI methods alongside all standard Playwright `Page` methods:
+
+ - `page.act(**kwargs)` — perform actions using natural language
+ - `page.extract(**kwargs)` — extract structured data with AI
+ - `page.observe(**kwargs)` — get AI-suggested actions on the page
+ - `page.execute(**kwargs)` — run an autonomous multi-step agent
+
+ ### Usage
+
+ ```python
+ from crawlee.crawlers import StagehandCrawler
+ from crawlee.crawlers._stagehand import StagehandCrawlingContext
+
+ crawler = StagehandCrawler()
+
+ @crawler.router.default_handler
+ async def handler(context: StagehandCrawlingContext) -> None:
+ await context.page.act(input='Click the login button')
+ data = await context.page.extract(instruction='Get the page title')
+ await context.push_data(data)
+
+ await crawler.run(['https://example.com'])
+ ```
+ """
+
+ def __init__(
+ self,
+ *,
+ stagehand_options: StagehandOptions | None = None,
+ browser_pool: BrowserPool | None = None,
+ browser_new_context_options: dict[str, Any] | None = None,
+ max_open_pages_per_browser: int = 20,
+ **kwargs: Unpack[BasicCrawlerOptions[PlaywrightCrawlingContext, StatisticsState]],
+ ) -> None:
+ """Initialize a new instance.
+
+ Args:
+ stagehand_options: Stagehand-specific configuration (model, API key, env, etc.).
+ Ignored if `browser_pool` is provided.
+ browser_pool: A pre-configured `BrowserPool`. All plugins must be instances of
+ `StagehandBrowserPlugin` (or its subclasses). If omitted, a pool is created
+ automatically from `stagehand_options`.
+ browser_new_context_options: Options passed to Playwright's `browser.new_context`
+ after connecting via CDP. Ignored if `browser_pool` is provided.
+ max_open_pages_per_browser: Maximum pages open per browser instance.
+ Ignored if `browser_pool` is provided.
+ kwargs: Additional keyword arguments forwarded to `BasicCrawler`.
+ """
+ if browser_pool is not None:
+ self._validate_browser_pool(browser_pool)
+ if stagehand_options is not None:
+ warnings.warn(
+ '`stagehand_options` is ignored when `browser_pool` is provided.',
+ stacklevel=2,
+ )
+ else:
+ browser_pool = BrowserPool(
+ plugins=[
+ StagehandBrowserPlugin(
+ stagehand_options=stagehand_options,
+ browser_new_context_options=browser_new_context_options,
+ max_open_pages_per_browser=max_open_pages_per_browser,
+ )
+ ]
+ )
+
+ super().__init__(browser_pool=browser_pool, **kwargs)
+
+ @staticmethod
+ def _validate_browser_pool(pool: BrowserPool) -> None:
+ invalid = [p for p in pool.plugins if not isinstance(p, StagehandBrowserPlugin)]
+ if invalid:
+ raise ValueError(
+ f'All BrowserPool plugins must be StagehandBrowserPlugin instances. Invalid plugins: {invalid}'
+ )
diff --git a/uv.lock b/uv.lock
index f7df250dc9..6f53d2c868 100644
--- a/uv.lock
+++ b/uv.lock
@@ -9,7 +9,7 @@ resolution-markers = [
]
[options]
-exclude-newer = "2026-04-15T07:01:49.228326682Z"
+exclude-newer = "2026-04-18T21:01:24.3365857Z"
exclude-newer-span = "PT24H"
[[package]]
@@ -873,6 +873,9 @@ sql-sqlite = [
{ name = "aiosqlite" },
{ name = "sqlalchemy", extra = ["asyncio"] },
]
+stragehard = [
+ { name = "stagehand" },
+]
[package.dev-dependencies]
dev = [
@@ -948,13 +951,14 @@ requires-dist = [
{ name = "sqlalchemy", extras = ["asyncio"], marker = "extra == 'sql-mysql'", specifier = ">=2.0.0,<3.0.0" },
{ name = "sqlalchemy", extras = ["asyncio"], marker = "extra == 'sql-postgres'", specifier = ">=2.0.0,<3.0.0" },
{ name = "sqlalchemy", extras = ["asyncio"], marker = "extra == 'sql-sqlite'", specifier = ">=2.0.0,<3.0.0" },
+ { name = "stagehand", marker = "extra == 'stragehard'", specifier = ">=3.19.0" },
{ name = "tldextract", specifier = ">=5.1.0" },
{ name = "typer", marker = "extra == 'cli'", specifier = ">=0.12.0" },
{ name = "typing-extensions", specifier = ">=4.1.0" },
{ name = "wrapt", marker = "extra == 'otel'", specifier = ">=1.17.0" },
{ name = "yarl", specifier = ">=1.18.0" },
]
-provides-extras = ["all", "adaptive-crawler", "beautifulsoup", "cli", "curl-impersonate", "httpx", "parsel", "playwright", "otel", "sql-postgres", "sql-sqlite", "sql-mysql", "redis"]
+provides-extras = ["all", "adaptive-crawler", "beautifulsoup", "cli", "curl-impersonate", "httpx", "parsel", "playwright", "otel", "sql-postgres", "stragehard", "sql-sqlite", "sql-mysql", "redis"]
[package.metadata.requires-dev]
dev = [
@@ -1148,6 +1152,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
]
+[[package]]
+name = "distro"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
+]
+
[[package]]
name = "docspec"
version = "2.2.1"
@@ -3704,6 +3717,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
]
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
+]
+
[[package]]
name = "sortedcontainers"
version = "2.4.0"
@@ -3787,6 +3809,26 @@ asyncio = [
{ name = "greenlet" },
]
+[[package]]
+name = "stagehand"
+version = "3.19.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "anyio" },
+ { name = "distro" },
+ { name = "httpx" },
+ { name = "pydantic" },
+ { name = "sniffio" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d9/f8/ccd2bb2758a4eaf0af3846e097ff206e0aa76c8d3b5aa2bded77fb47825e/stagehand-3.19.5.tar.gz", hash = "sha256:3cb8279ac82051e584b34d26e87dc764f0ccad766a01625198ca578eb35f0b6c", size = 281033, upload-time = "2026-04-03T20:21:09.792Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d1/6f/a47bad258bfafc193ebb8e0e8c440e8028c9ab28b54a333b46aa3c0cff53/stagehand-3.19.5-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:14f39a4f8d30d77c089166185c705f66aade25432b903a663a937b3747439c26", size = 34495874, upload-time = "2026-04-03T20:21:07.366Z" },
+ { url = "https://files.pythonhosted.org/packages/72/f7/e39868903121f1a80ae6eda088383362cd2d3a578c04493a2f83c1aac1da/stagehand-3.19.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:80ed0d732cb9c3e952ad851e071dad5775a9ea88d2787c006289d61097fd2609", size = 33193535, upload-time = "2026-04-03T20:21:18.536Z" },
+ { url = "https://files.pythonhosted.org/packages/c8/0b/35cb92bb53e9539c0147892dbd0a227b43bf0d8adcd0a8e867dc5f2bf7fd/stagehand-3.19.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:aa947a5f6241f5953ac238cd9b0ab72e0cb87f559f97e5ee875f83dbc0c351d1", size = 37273148, upload-time = "2026-04-03T20:21:11.939Z" },
+ { url = "https://files.pythonhosted.org/packages/7c/c7/dccf63cba1941b5710dc9968218e2883a937cf6534d644bb0c5222d3f40a/stagehand-3.19.5-py3-none-win_amd64.whl", hash = "sha256:e37bf630b99b4a9b7d95f151c56b296940db88b3049b68f0abb56f9e31cc6095", size = 30758357, upload-time = "2026-04-03T20:21:15.121Z" },
+]
+
[[package]]
name = "text-unidecode"
version = "1.3"