Skip to content

Commit a4ac82e

Browse files
authored
fix: df.loc with the 2nd input as bigframes boolean Series (#789)
* fix: df.loc with the 2nd input as bigframes boolean Series * merge main * modify test case * fix comment
1 parent f6c89b5 commit a4ac82e

File tree

2 files changed

+29
-1
lines changed

2 files changed

+29
-1
lines changed

bigframes/core/indexers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,12 @@ def __getitem__(self, key):
159159
)
160160

161161
columns = key[1]
162-
if isinstance(columns, pd.Series) and columns.dtype == bool:
162+
if isinstance(columns, bigframes.series.Series):
163+
columns = columns.to_pandas()
164+
if isinstance(columns, pd.Series) and columns.dtype in (
165+
bool,
166+
pd.BooleanDtype(),
167+
):
163168
columns = df.columns[typing.cast(pd.Series, columns)]
164169

165170
return df[columns]

tests/system/small/test_dataframe.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3020,6 +3020,29 @@ def test_loc_select_with_column_condition(scalars_df_index, scalars_pandas_df_in
30203020
)
30213021

30223022

3023+
def test_loc_select_with_column_condition_bf_series(
3024+
scalars_df_index, scalars_pandas_df_index
3025+
):
3026+
# (b/347072677) GEOGRAPH type doesn't support DISTINCT op
3027+
columns = [
3028+
item for item in scalars_pandas_df_index.columns if item != "geography_col"
3029+
]
3030+
scalars_df_index = scalars_df_index[columns]
3031+
scalars_pandas_df_index = scalars_pandas_df_index[columns]
3032+
3033+
size_half = len(scalars_pandas_df_index) / 2
3034+
bf_result = scalars_df_index.loc[
3035+
:, scalars_df_index.nunique() > size_half
3036+
].to_pandas()
3037+
pd_result = scalars_pandas_df_index.loc[
3038+
:, scalars_pandas_df_index.nunique() > size_half
3039+
]
3040+
pd.testing.assert_frame_equal(
3041+
bf_result,
3042+
pd_result,
3043+
)
3044+
3045+
30233046
def test_loc_single_index_with_duplicate(scalars_df_index, scalars_pandas_df_index):
30243047
scalars_df_index = scalars_df_index.set_index("string_col", drop=False)
30253048
scalars_pandas_df_index = scalars_pandas_df_index.set_index(

0 commit comments

Comments
 (0)