fix: df.loc with the 2nd input as bigframes boolean Series (#789)

GarrettWu · web-flow · commit a4ac82e06221 · 2024-06-14T10:31:23.000-07:00
* fix: df.loc with the 2nd input as bigframes boolean Series

* merge main

* modify test case

* fix comment
diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py
@@ -159,7 +159,12 @@ def __getitem__(self, key):
                 )
 
                 columns = key[1]
-                if isinstance(columns, pd.Series) and columns.dtype == bool:
+                if isinstance(columns, bigframes.series.Series):
+                    columns = columns.to_pandas()
+                if isinstance(columns, pd.Series) and columns.dtype in (
+                    bool,
+                    pd.BooleanDtype(),
+                ):
                     columns = df.columns[typing.cast(pd.Series, columns)]
 
                 return df[columns]
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
@@ -3020,6 +3020,29 @@ def test_loc_select_with_column_condition(scalars_df_index, scalars_pandas_df_in
     )
 
 
+def test_loc_select_with_column_condition_bf_series(
+    scalars_df_index, scalars_pandas_df_index
+):
+    # (b/347072677) GEOGRAPH type doesn't support DISTINCT op
+    columns = [
+        item for item in scalars_pandas_df_index.columns if item != "geography_col"
+    ]
+    scalars_df_index = scalars_df_index[columns]
+    scalars_pandas_df_index = scalars_pandas_df_index[columns]
+
+    size_half = len(scalars_pandas_df_index) / 2
+    bf_result = scalars_df_index.loc[
+        :, scalars_df_index.nunique() > size_half
+    ].to_pandas()
+    pd_result = scalars_pandas_df_index.loc[
+        :, scalars_pandas_df_index.nunique() > size_half
+    ]
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 def test_loc_single_index_with_duplicate(scalars_df_index, scalars_pandas_df_index):
     scalars_df_index = scalars_df_index.set_index("string_col", drop=False)
     scalars_pandas_df_index = scalars_pandas_df_index.set_index(