Skip to content

Commit 35c1c33

Browse files
fix: Fix too many cluster columns requested by caching (#2155)
1 parent 5cc3c5b commit 35c1c33

File tree

2 files changed

+23
-2
lines changed

2 files changed

+23
-2
lines changed

bigframes/session/bq_caching_executor.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -637,14 +637,18 @@ def _execute_plan_gbq(
637637

638638
create_table = True
639639
if not cache_spec.cluster_cols:
640-
assert len(cache_spec.cluster_cols) <= _MAX_CLUSTER_COLUMNS
641640
offsets_id = bigframes.core.identifiers.ColumnId(
642641
bigframes.core.guid.generate_guid()
643642
)
644643
plan = nodes.PromoteOffsetsNode(plan, offsets_id)
645644
cluster_cols = [offsets_id.sql]
646645
else:
647-
cluster_cols = cache_spec.cluster_cols
646+
cluster_cols = [
647+
col
648+
for col in cache_spec.cluster_cols
649+
if bigframes.dtypes.is_clusterable(plan.schema.get_type(col))
650+
]
651+
cluster_cols = cluster_cols[:_MAX_CLUSTER_COLUMNS]
648652

649653
compiled = compile.compile_sql(
650654
compile.CompileRequest(

tests/system/small/test_dataframe.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5537,6 +5537,23 @@ def test_df_cached(scalars_df_index):
55375537
pandas.testing.assert_frame_equal(df.to_pandas(), df_cached_copy.to_pandas())
55385538

55395539

5540+
def test_df_cached_many_index_cols(scalars_df_index):
5541+
index_cols = [
5542+
"int64_too",
5543+
"time_col",
5544+
"int64_col",
5545+
"bool_col",
5546+
"date_col",
5547+
"timestamp_col",
5548+
"string_col",
5549+
]
5550+
df = scalars_df_index.set_index(index_cols)
5551+
df = df[df["rowindex_2"] % 2 == 0]
5552+
5553+
df_cached_copy = df.cache()
5554+
pandas.testing.assert_frame_equal(df.to_pandas(), df_cached_copy.to_pandas())
5555+
5556+
55405557
def test_assign_after_binop_row_joins():
55415558
pd_df = pd.DataFrame(
55425559
{

0 commit comments

Comments
 (0)