diff --git a/crawl4ai/content_scraping_strategy.py b/crawl4ai/content_scraping_strategy.py index 9853f788f..9cdc53cfd 100644 --- a/crawl4ai/content_scraping_strategy.py +++ b/crawl4ai/content_scraping_strategy.py @@ -562,6 +562,14 @@ def remove_empty_elements_fast(self, root, word_count_threshold=5): ): parent = el.getparent() if parent is not None: + # Preserve .tail text before removing the element + tail = el.tail + if tail: + prev = el.getprevious() + if prev is not None: + prev.tail = (prev.tail or "") + tail + else: + parent.text = (parent.text or "") + tail parent.remove(el) return root