diff --git a/feature_engine/imputation/missing_indicator.py b/feature_engine/imputation/missing_indicator.py index 7976aa749..01660a654 100644 --- a/feature_engine/imputation/missing_indicator.py +++ b/feature_engine/imputation/missing_indicator.py @@ -160,9 +160,13 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame: """ X = self._transform(X) - - indicator_names = [f"{feature}_na" for feature in self.variables_] - X[indicator_names] = X[self.variables_].isna().astype(int) + X_indicators = ( + X[self.variables_] + .isna() + .astype("int8") + .add_suffix("_na") + ) + X = pd.concat([X, X_indicators], axis=1) return X diff --git a/tests/test_imputation/test_missing_indicator.py b/tests/test_imputation/test_missing_indicator.py index d5340c1ac..a7f6e9f7c 100644 --- a/tests/test_imputation/test_missing_indicator.py +++ b/tests/test_imputation/test_missing_indicator.py @@ -1,4 +1,8 @@ +import warnings +import numpy as np +import pandas as pd import pytest + from sklearn.pipeline import Pipeline from feature_engine.imputation import AddMissingIndicator @@ -96,3 +100,26 @@ def test_get_feature_names_out_from_pipeline(df_na): assert tr.get_feature_names_out(input_features=None) == feat_out assert tr.get_feature_names_out(input_features=original_features) == feat_out + + +def test_no_performance_warning_with_many_variables(): + n_cols = 101 + df = pd.DataFrame( + np.random.randn(10, n_cols), + columns=[f"col_{i}" for i in range(n_cols)], + ) + + # Introduce missing values + df.iloc[0, :] = np.nan + + ami = AddMissingIndicator(missing_only=False) + ami.fit(df) + + with warnings.catch_warnings(record=True) as captured: + warnings.simplefilter("always") + ami.transform(df) + + assert not any( + issubclass(w.category, pd.errors.PerformanceWarning) + for w in captured + ), "PerformanceWarning was raised during transform"