Skip to content

Commit 32fab96

Browse files
authored
feat: show possible correct key(s) in .__getitem__ KeyError message (#1097)
* feat: show possible correct key(s) in .__getitem__ KeyError message * Keep one if statment
1 parent 6b3ceaa commit 32fab96

File tree

2 files changed

+36
-17
lines changed

2 files changed

+36
-17
lines changed

bigframes/core/groupby/__init__.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import bigframes_vendored.constants as constants
2121
import bigframes_vendored.pandas.core.groupby as vendored_pandas_groupby
22+
import jellyfish
2223
import pandas as pd
2324

2425
from bigframes.core import log_adapter
@@ -91,8 +92,21 @@ def __getitem__(
9192

9293
bad_keys = [key for key in keys if key not in self._block.column_labels]
9394

95+
# Raise a KeyError message with the possible correct key(s)
9496
if len(bad_keys) > 0:
95-
raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}")
97+
possible_key = []
98+
for bad_key in bad_keys:
99+
possible_key.append(
100+
min(
101+
self._block.column_labels,
102+
key=lambda item: jellyfish.damerau_levenshtein_distance(
103+
bad_key, item
104+
),
105+
)
106+
)
107+
raise KeyError(
108+
f"Columns not found: {str(bad_keys)[1:-1]}. Did you mean {str(possible_key)[1:-1]}?"
109+
)
96110

97111
columns = [
98112
col_id for col_id, label in self._col_id_labels.items() if label in keys

tests/system/small/test_groupby.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -426,24 +426,12 @@ def test_dataframe_groupby_getitem_error(
426426
scalars_pandas_df_index,
427427
):
428428
col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
429-
with pytest.raises(KeyError, match="\"Columns not found: 'not_in_group'\""):
430-
(
431-
scalars_df_index[col_names]
432-
.groupby("string_col")["not_in_group"]
433-
.min()
434-
.to_pandas()
435-
)
436-
437-
438-
def test_dataframe_groupby_getitem_multiple_columns_error(
439-
scalars_df_index,
440-
scalars_pandas_df_index,
441-
):
442-
col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
443-
with pytest.raises(KeyError, match="\"Columns not found: 'col1', 'col2'\""):
429+
with pytest.raises(
430+
KeyError, match=r"Columns not found: 'not_in_group'. Did you mean 'string_col'?"
431+
):
444432
(
445433
scalars_df_index[col_names]
446-
.groupby("string_col")["col1", "col2"]
434+
.groupby("bool_col")["not_in_group"]
447435
.min()
448436
.to_pandas()
449437
)
@@ -464,6 +452,23 @@ def test_dataframe_groupby_getitem_list(
464452
pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)
465453

466454

455+
def test_dataframe_groupby_getitem_list_error(
456+
scalars_df_index,
457+
scalars_pandas_df_index,
458+
):
459+
col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
460+
with pytest.raises(
461+
KeyError,
462+
match=r"Columns not found: 'col1', 'float'. Did you mean 'bool_col', 'float64_col'?",
463+
):
464+
(
465+
scalars_df_index[col_names]
466+
.groupby("string_col")["col1", "float"]
467+
.min()
468+
.to_pandas()
469+
)
470+
471+
467472
def test_dataframe_groupby_nonnumeric_with_mean():
468473
df = pd.DataFrame(
469474
{

0 commit comments

Comments
 (0)