From 5e5519b1c618245ed9ac572e6efe51535e42e8f5 Mon Sep 17 00:00:00 2001
From: hafezparast <maysam@kidocode.com>
Date: Fri, 1 May 2026 16:18:03 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20log=20failure=20reason=20before=20COMPLE?=
 =?UTF-8?q?TE=20and=20fix=20misleading=20SCRAPE=20=E2=9C=93=20(#1949)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two issues caused silent COMPLETE ✗ with no diagnostic output:

1. When crawl_result.success=False (anti-bot detection, empty HTML, etc.),
   the error_message was set on the CrawlResult but never logged — users
   saw only [COMPLETE] ✗ with zero explanation. Fix: emit an [ERROR] log
   containing error_message before the COMPLETE line whenever success=False.

2. The SCRAPE log in aprocess_html always emitted success=True regardless
   of whether scraping produced any content. Fix: use bool(cleaned_html)
   so SCRAPE reflects the actual outcome.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 crawl4ai/async_webcrawler.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py
index b0950ff8f..f81096c07 100644
--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -638,6 +638,14 @@ async def arun(
                             head_html = crawl_result.html[:head_end + 7]
                             crawl_result.head_fingerprint = compute_head_fingerprint(head_html)
 
+                    # Log failure reason before COMPLETE so users can see why it failed.
+                    if crawl_result and not crawl_result.success and crawl_result.error_message:
+                        self.logger.error_status(
+                            url=cache_context.display_url,
+                            error=crawl_result.error_message,
+                            tag="ERROR",
+                        )
+
                     self.logger.url_status(
                         url=cache_context.display_url,
                         success=crawl_result.success if crawl_result else False,
@@ -852,10 +860,10 @@ async def aprocess_html(
             )
         )
 
-        # Log processing completion
+        # Log processing completion — reflect actual content outcome
         self.logger.url_status(
             url=_url,
-            success=True,
+            success=bool(cleaned_html),
             timing=int((time.perf_counter() - t1) * 1000) / 1000,
             tag="SCRAPE"
         )