Skip to content

Commit 89ea44f

Browse files
tswastchelsea-lingcf-owl-bot[bot]
authored
deps: update to ibis-framework 9.x and newer sqlglot (#827)
* deps: update to ibis-framework 9.x and newer sqlglot * update sqlglot and ibis * bump minimum pandas * bump pyarrow * fix bfill and ffill * nearly implement describe * remove remaining reference to vendored_ibis_ops.ApproximateMultiQuantile * support ToJsonString * partial support for quantile * fix inmemorytable * fixed Series.explode * nearly fix to_datetime * remove tests I added * patch for python 3.9 support * fix unit tests * fix explode with time type * fix array_agg * fix array_agg for asc order * actually fix array_agg * fix remote function * fix in-memory nullable integer compilation * fix test_df_construct_pandas_default on Python 3.9 * fix ShiftOp windows * fix inf to SQL by treating values as literal in in memory table * fix unit tests for ibis-framework 9.2.0 * fix Python 3.10 unit tests by syncing deps * fixing remote function after merge * fix visit_NonNullLiteral for int types * visit_WindowFunction to fix s.median() method * fix lint * fix s.diff with window * fix mypy * patch visit_And to fix is_monotonic methods * fix mypy and fillna warning * undo window changes for test_series_autocorr * undo fill_null because it was missed at 9.0 version * vendor more of ibis for python 3.9 compatibility * add default arg for nulls_first for python 3.9 support * restore integer conversion * fix window tests: diff, duplicated, shift * fixing ibis parenthesize_inputs bugs and related tests * fixing lint * disable test_query_complexity_error * fix doctest np.int64(0) upgrades * fix doctest np.int64(0) upgrades more * fix groupby diff * addressing system-3.12/doctest issues related to numpy 2.1.1 * fix test_df_apply_axis_1_complex * address compiler errors after merge * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix unit-test compile errors * remove unused ibis codes * fix fillna deprecated warning * add _remove_null_ordering_from_unsupported_window back to fix test_precision_score etc ml tests * fix is_monotonic_decreasing test * fix explode after merge * fix numpy on remote function test * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * ml numpy sql generations --------- Co-authored-by: Chelsea Lin <[email protected]> Co-authored-by: Chelsea Lin <[email protected]> Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 06c3120 commit 89ea44f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+813
-901
lines changed

bigframes/core/block_transforms.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -387,10 +387,9 @@ def value_counts(
387387

388388
def pct_change(block: blocks.Block, periods: int = 1) -> blocks.Block:
389389
column_labels = block.column_labels
390-
window_spec = windows.rows(
391-
preceding=periods if periods > 0 else None,
392-
following=-periods if periods < 0 else None,
393-
)
390+
391+
# Window framing clause is not allowed for analytic function lag.
392+
window_spec = windows.unbound()
394393

395394
original_columns = block.value_columns
396395
block, shift_columns = block.multi_apply_window_op(

bigframes/core/blocks.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
import bigframes.core.schema as bf_schema
5151
import bigframes.core.sql as sql
5252
import bigframes.core.utils as utils
53-
import bigframes.core.window_spec as window_specs
53+
import bigframes.core.window_spec as windows
5454
import bigframes.dtypes
5555
import bigframes.exceptions
5656
import bigframes.features
@@ -900,7 +900,7 @@ def multi_apply_window_op(
900900
self,
901901
columns: typing.Sequence[str],
902902
op: agg_ops.WindowOp,
903-
window_spec: window_specs.WindowSpec,
903+
window_spec: windows.WindowSpec,
904904
*,
905905
skip_null_groups: bool = False,
906906
never_skip_nulls: bool = False,
@@ -959,7 +959,7 @@ def apply_window_op(
959959
self,
960960
column: str,
961961
op: agg_ops.WindowOp,
962-
window_spec: window_specs.WindowSpec,
962+
window_spec: windows.WindowSpec,
963963
*,
964964
result_label: Label = None,
965965
skip_null_groups: bool = False,
@@ -1475,7 +1475,7 @@ def grouped_head(
14751475
value_columns: typing.Sequence[str],
14761476
n: int,
14771477
):
1478-
window_spec = window_specs.cumulative_rows(grouping_keys=tuple(by_column_ids))
1478+
window_spec = windows.cumulative_rows(grouping_keys=tuple(by_column_ids))
14791479

14801480
block, result_id = self.apply_window_op(
14811481
value_columns[0],
@@ -2383,10 +2383,7 @@ def _is_monotonic(
23832383
return self._stats_cache[column_name][op_name]
23842384

23852385
period = 1
2386-
window = window_specs.rows(
2387-
preceding=period,
2388-
following=None,
2389-
)
2386+
window_spec = windows.rows()
23902387

23912388
# any NaN value means not monotonic
23922389
block, last_notna_id = self.apply_unary_op(column_ids[0], ops.notnull_op)
@@ -2402,7 +2399,7 @@ def _is_monotonic(
24022399
last_result_id = None
24032400
for column_id in column_ids[::-1]:
24042401
block, lag_result_id = block.apply_window_op(
2405-
column_id, agg_ops.ShiftOp(period), window
2402+
column_id, agg_ops.ShiftOp(period), window_spec
24062403
)
24072404
block, strict_monotonic_id = block.apply_binary_op(
24082405
column_id, lag_result_id, ops.gt_op if increasing else ops.lt_op

bigframes/core/compile/aggregate_compiler.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414
import functools
1515
import typing
16-
from typing import cast, Optional
16+
from typing import cast, List, Optional
1717

1818
import bigframes_vendored.constants as constants
1919
import bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
@@ -31,6 +31,17 @@
3131
scalar_compiler = scalar_compilers.scalar_op_compiler
3232

3333

34+
# TODO(swast): We can remove this if ibis adds general approx_quantile
35+
# See: https://github.com/ibis-project/ibis/issues/9541
36+
@ibis.udf.agg.builtin
37+
def approx_quantiles(expression: float, number) -> List[float]:
38+
"""APPROX_QUANTILES
39+
40+
https://cloud.google.com/bigquery/docs/reference/standard-sql/approximate_aggregate_functions#approx_quantiles
41+
"""
42+
return [] # pragma: NO COVER
43+
44+
3445
def compile_aggregate(
3546
aggregate: ex.Aggregation,
3647
bindings: typing.Dict[str, ibis_types.Value],
@@ -176,15 +187,12 @@ def _(
176187
column: ibis_types.NumericColumn,
177188
window=None,
178189
) -> ibis_types.NumericValue:
179-
# PERCENTILE_CONT has very few allowed windows. For example, "window
180-
# framing clause is not allowed for analytic function percentile_cont".
190+
# APPROX_QUANTILES has very few allowed windows.
181191
if window is not None:
182192
raise NotImplementedError(
183193
f"Approx Quartiles with windowing is not supported. {constants.FEEDBACK_LINK}"
184194
)
185-
value = vendored_ibis_ops.ApproximateMultiQuantile(
186-
column, num_bins=4 # type: ignore
187-
).to_expr()[op.quartile]
195+
value = approx_quantiles(column, 4)[op.quartile] # type: ignore
188196
return cast(ibis_types.NumericValue, value)
189197

190198

@@ -513,11 +521,15 @@ def _(
513521
column: ibis_types.Column,
514522
window=None,
515523
) -> ibis_types.BooleanValue:
516-
# BQ will return null for empty column, result would be true in pandas.
517-
result = _is_true(column).all()
524+
# BQ will return null for empty column, result would be false in pandas.
525+
result = _apply_window_if_present(_is_true(column).all(), window)
526+
literal = ibis_types.literal(True)
527+
518528
return cast(
519529
ibis_types.BooleanScalar,
520-
_apply_window_if_present(result, window).fillna(ibis_types.literal(True)),
530+
result.fill_null(literal)
531+
if hasattr(result, "fill_null")
532+
else result.fillna(literal),
521533
)
522534

523535

@@ -528,10 +540,14 @@ def _(
528540
window=None,
529541
) -> ibis_types.BooleanValue:
530542
# BQ will return null for empty column, result would be false in pandas.
531-
result = _is_true(column).any()
543+
result = _apply_window_if_present(_is_true(column).any(), window)
544+
literal = ibis_types.literal(False)
545+
532546
return cast(
533547
ibis_types.BooleanScalar,
534-
_apply_window_if_present(result, window).fillna(ibis_types.literal(False)),
548+
result.fill_null(literal)
549+
if hasattr(result, "fill_null")
550+
else result.fillna(literal),
535551
)
536552

537553

bigframes/core/compile/compiled.py

Lines changed: 21 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,9 @@
1919
import typing
2020
from typing import Collection, Literal, Optional, Sequence
2121

22-
import bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
22+
import bigframes_vendored.ibis.backends.bigquery.backend as ibis_bigquery
2323
import google.cloud.bigquery
2424
import ibis
25-
import ibis.backends.bigquery as ibis_bigquery
2625
import ibis.backends.bigquery.datatypes
2726
import ibis.common.deferred # type: ignore
2827
import ibis.expr.datatypes as ibis_dtypes
@@ -407,18 +406,13 @@ def explode(self, offsets: typing.Sequence[int]) -> UnorderedIR:
407406

408407
# The offset array ensures null represents empty arrays after unnesting.
409408
offset_array_id = bigframes.core.guid.generate_guid("offset_array_")
410-
offset_array = (
411-
vendored_ibis_ops.GenerateArray(
412-
ibis.greatest(
413-
0,
414-
ibis.least(
415-
*[table[column_id].length() - 1 for column_id in column_ids]
416-
),
417-
)
418-
)
419-
.to_expr()
420-
.name(offset_array_id),
421-
)
409+
offset_array = ibis.range(
410+
0,
411+
ibis.greatest(
412+
1, # We always want at least 1 element to fill in NULLs for empty arrays.
413+
ibis.least(*[table[column_id].length() for column_id in column_ids]),
414+
),
415+
).name(offset_array_id)
422416
table_w_offset_array = table.select(
423417
offset_array,
424418
*self._column_names,
@@ -718,21 +712,13 @@ def explode(self, offsets: typing.Sequence[int]) -> OrderedIR:
718712
column_ids = tuple(table.columns[offset] for offset in offsets)
719713

720714
offset_array_id = bigframes.core.guid.generate_guid("offset_array_")
721-
offset_array = (
722-
vendored_ibis_ops.GenerateArray(
723-
ibis.greatest(
724-
0,
725-
ibis.least(
726-
*[
727-
table[table.columns[offset]].length() - 1
728-
for offset in offsets
729-
]
730-
),
731-
)
732-
)
733-
.to_expr()
734-
.name(offset_array_id),
735-
)
715+
offset_array = ibis.range(
716+
0,
717+
ibis.greatest(
718+
1, # We always want at least 1 element to fill in NULLs for empty arrays.
719+
ibis.least(*[table[column_id].length() for column_id in column_ids]),
720+
),
721+
).name(offset_array_id)
736722
table_w_offset_array = table.select(
737723
offset_array,
738724
*self._column_names,
@@ -870,7 +856,7 @@ def project_window_op(
870856

871857
clauses = []
872858
if op.skips_nulls and not never_skip_nulls:
873-
clauses.append((column.isnull(), ibis.NA))
859+
clauses.append((column.isnull(), ibis.null()))
874860
if window_spec.min_periods:
875861
if op.skips_nulls:
876862
# Most operations do not count NULL values towards min_periods
@@ -891,7 +877,7 @@ def project_window_op(
891877
clauses.append(
892878
(
893879
observation_count < ibis_types.literal(window_spec.min_periods),
894-
ibis.NA,
880+
ibis.null(),
895881
)
896882
)
897883
if clauses:
@@ -1322,9 +1308,10 @@ def _ibis_window_from_spec(
13221308
bounds.preceding, bounds.following, how="range"
13231309
)
13241310
if isinstance(bounds, RowsWindowBounds):
1325-
window = window.preceding_following(
1326-
bounds.preceding, bounds.following, how="rows"
1327-
)
1311+
if bounds.preceding is not None or bounds.following is not None:
1312+
window = window.preceding_following(
1313+
bounds.preceding, bounds.following, how="rows"
1314+
)
13281315
else:
13291316
raise ValueError(f"unrecognized window bounds {bounds}")
13301317
return window

bigframes/core/compile/default_ordering.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,12 @@ def _convert_to_nonnull_string(column: ibis_types.Column) -> ibis_types.StringVa
4949
# Needed for JSON, STRUCT and ARRAY datatypes
5050
result = vendored_ibis_ops.ToJsonString(column).to_expr() # type: ignore
5151
# Escape backslashes and use backslash as delineator
52-
escaped = cast(ibis_types.StringColumn, result.fillna("")).replace("\\", "\\\\") # type: ignore
52+
escaped = cast(
53+
ibis_types.StringColumn,
54+
result.fill_null("") if hasattr(result, "fill_null") else result.fillna(""),
55+
).replace(
56+
"\\", "\\\\"
57+
) # type: ignore
5358
return cast(ibis_types.StringColumn, ibis.literal("\\")).concat(escaped)
5459

5560

bigframes/core/compile/scalar_op_compiler.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -842,7 +842,7 @@ def isin_op_impl(x: ibis_types.Value, op: ops.IsInOp):
842842
@scalar_op_compiler.register_unary_op(ops.ToDatetimeOp, pass_op=True)
843843
def to_datetime_op_impl(x: ibis_types.Value, op: ops.ToDatetimeOp):
844844
if x.type() == ibis_dtypes.str:
845-
return vendored_ibis_ops.SafeCastToDatetime(x).to_expr()
845+
return x.try_cast(ibis_dtypes.Timestamp(None))
846846
else:
847847
# Numerical inputs.
848848
if op.format:
@@ -995,8 +995,14 @@ def eq_nulls_match_op(
995995
y: ibis_types.Value,
996996
):
997997
"""Variant of eq_op where nulls match each other. Only use where dtypes are known to be same."""
998-
left = x.cast(ibis_dtypes.str).fillna(ibis_types.literal("$NULL_SENTINEL$"))
999-
right = y.cast(ibis_dtypes.str).fillna(ibis_types.literal("$NULL_SENTINEL$"))
998+
literal = ibis_types.literal("$NULL_SENTINEL$")
999+
if hasattr(x, "fill_null"):
1000+
left = x.cast(ibis_dtypes.str).fill_null(literal)
1001+
right = y.cast(ibis_dtypes.str).fill_null(literal)
1002+
else:
1003+
left = x.cast(ibis_dtypes.str).fillna(literal)
1004+
right = y.cast(ibis_dtypes.str).fillna(literal)
1005+
10001006
return left == right
10011007

10021008

@@ -1379,7 +1385,10 @@ def fillna_op(
13791385
x: ibis_types.Value,
13801386
y: ibis_types.Value,
13811387
):
1382-
return x.fillna(typing.cast(ibis_types.Scalar, y))
1388+
if hasattr(x, "fill_null"):
1389+
return x.fill_null(typing.cast(ibis_types.Scalar, y))
1390+
else:
1391+
return x.fillna(typing.cast(ibis_types.Scalar, y))
13831392

13841393

13851394
@scalar_op_compiler.register_binary_op(ops.round_op)

bigframes/core/compile/single_column.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,4 +179,8 @@ def value_to_join_key(value: ibis_types.Value):
179179
"""Converts nullable values to non-null string SQL will not match null keys together - but pandas does."""
180180
if not value.type().is_string():
181181
value = value.cast(ibis_dtypes.str)
182-
return value.fillna(ibis_types.literal("$NULL_SENTINEL$"))
182+
return (
183+
value.fill_null(ibis_types.literal("$NULL_SENTINEL$"))
184+
if hasattr(value, "fill_null")
185+
else value.fillna(ibis_types.literal("$NULL_SENTINEL$"))
186+
)

bigframes/core/groupby/__init__.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -255,19 +255,17 @@ def cumprod(self, *args, **kwargs) -> df.DataFrame:
255255

256256
@validations.requires_ordering()
257257
def shift(self, periods=1) -> series.Series:
258-
window = window_specs.rows(
258+
# Window framing clause is not allowed for analytic function lag.
259+
window = window_specs.unbound(
259260
grouping_keys=tuple(self._by_col_ids),
260-
preceding=periods if periods > 0 else None,
261-
following=-periods if periods < 0 else None,
262261
)
263262
return self._apply_window_op(agg_ops.ShiftOp(periods), window=window)
264263

265264
@validations.requires_ordering()
266265
def diff(self, periods=1) -> series.Series:
266+
# Window framing clause is not allowed for analytic function lag.
267267
window = window_specs.rows(
268268
grouping_keys=tuple(self._by_col_ids),
269-
preceding=periods if periods > 0 else None,
270-
following=-periods if periods < 0 else None,
271269
)
272270
return self._apply_window_op(agg_ops.DiffOp(periods), window=window)
273271

@@ -685,19 +683,16 @@ def cumcount(self, *args, **kwargs) -> series.Series:
685683
@validations.requires_ordering()
686684
def shift(self, periods=1) -> series.Series:
687685
"""Shift index by desired number of periods."""
686+
# Window framing clause is not allowed for analytic function lag.
688687
window = window_specs.rows(
689688
grouping_keys=tuple(self._by_col_ids),
690-
preceding=periods if periods > 0 else None,
691-
following=-periods if periods < 0 else None,
692689
)
693690
return self._apply_window_op(agg_ops.ShiftOp(periods), window=window)
694691

695692
@validations.requires_ordering()
696693
def diff(self, periods=1) -> series.Series:
697694
window = window_specs.rows(
698695
grouping_keys=tuple(self._by_col_ids),
699-
preceding=periods if periods > 0 else None,
700-
following=-periods if periods < 0 else None,
701696
)
702697
return self._apply_window_op(agg_ops.DiffOp(periods), window=window)
703698

bigframes/core/window_spec.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ def rows(
7070
Returns:
7171
WindowSpec
7272
"""
73-
assert (preceding is not None) or (following is not None)
7473
bounds = RowsWindowBounds(preceding=preceding, following=following)
7574
return WindowSpec(
7675
grouping_keys=grouping_keys,

0 commit comments

Comments
 (0)