diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4cad8db24..692563019 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -62,7 +62,7 @@ jobs: uses: actions/cache@v5 with: path: ~/.cargo - key: cargo-cache-${{ steps.rust-toolchain.outputs.cachekey }}-${{ hashFiles('Cargo.lock') }} + key: cargo-cache-${{ matrix.toolchain }}-${{ hashFiles('Cargo.lock') }} - name: Install dependencies uses: astral-sh/setup-uv@v7 @@ -106,7 +106,7 @@ jobs: RUST_BACKTRACE: 1 run: | git submodule update --init - uv run --no-project pytest -v . --import-mode=importlib + uv run --no-project pytest -v --import-mode=importlib - name: FFI unit tests run: | diff --git a/pyproject.toml b/pyproject.toml index 08d64eca0..b238e049e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,9 @@ features = ["substrait"] [tool.pytest.ini_options] asyncio_mode = "auto" asyncio_default_fixture_loop_scope = "function" +addopts = "--doctest-modules" +doctest_optionflags = ["NORMALIZE_WHITESPACE", "ELLIPSIS"] +testpaths = ["python/tests", "python/datafusion"] # Enable docstring linting using the google style guide [tool.ruff.lint] diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index d302c12a5..5bd0eec2d 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -327,8 +327,9 @@ def into_view(self, temporary: bool = False) -> Table: >>> df = ctx.sql("SELECT 1 AS value") >>> view = df.into_view() >>> ctx.register_table("values_view", view) - >>> df.collect() # The DataFrame is still usable - >>> ctx.sql("SELECT value FROM values_view").collect() + >>> result = ctx.sql("SELECT value FROM values_view").collect() + >>> result[0].column("value").to_pylist() + [1] """ from datafusion.catalog import Table as _Table @@ -1389,9 +1390,12 @@ def fill_null(self, value: Any, subset: list[str] | None = None) -> DataFrame: DataFrame with null values replaced where type casting is possible Examples: - >>> df = df.fill_null(0) # Fill all nulls with 0 where possible - >>> # Fill nulls in specific string columns - >>> df = df.fill_null("missing", subset=["name", "category"]) + >>> from datafusion import SessionContext, col + >>> ctx = SessionContext() + >>> df = ctx.from_pydict({"a": [1, None, 3], "b": [None, 5, 6]}) + >>> filled = df.fill_null(0) + >>> filled.sort(col("a")).collect()[0].column("a").to_pylist() + [0, 1, 3] Notes: - Only fills nulls in columns where the value can be cast to the column type