From 5e5519b1c618245ed9ac572e6efe51535e42e8f5 Mon Sep 17 00:00:00 2001 From: hafezparast Date: Fri, 1 May 2026 16:18:03 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20log=20failure=20reason=20before=20COMPLE?= =?UTF-8?q?TE=20and=20fix=20misleading=20SCRAPE=20=E2=9C=93=20(#1949)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues caused silent COMPLETE ✗ with no diagnostic output: 1. When crawl_result.success=False (anti-bot detection, empty HTML, etc.), the error_message was set on the CrawlResult but never logged — users saw only [COMPLETE] ✗ with zero explanation. Fix: emit an [ERROR] log containing error_message before the COMPLETE line whenever success=False. 2. The SCRAPE log in aprocess_html always emitted success=True regardless of whether scraping produced any content. Fix: use bool(cleaned_html) so SCRAPE reflects the actual outcome. Co-Authored-By: Claude Sonnet 4.6 --- crawl4ai/async_webcrawler.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py index b0950ff8f..f81096c07 100644 --- a/crawl4ai/async_webcrawler.py +++ b/crawl4ai/async_webcrawler.py @@ -638,6 +638,14 @@ async def arun( head_html = crawl_result.html[:head_end + 7] crawl_result.head_fingerprint = compute_head_fingerprint(head_html) + # Log failure reason before COMPLETE so users can see why it failed. + if crawl_result and not crawl_result.success and crawl_result.error_message: + self.logger.error_status( + url=cache_context.display_url, + error=crawl_result.error_message, + tag="ERROR", + ) + self.logger.url_status( url=cache_context.display_url, success=crawl_result.success if crawl_result else False, @@ -852,10 +860,10 @@ async def aprocess_html( ) ) - # Log processing completion + # Log processing completion — reflect actual content outcome self.logger.url_status( url=_url, - success=True, + success=bool(cleaned_html), timing=int((time.perf_counter() - t1) * 1000) / 1000, tag="SCRAPE" )