Skip to content

Commit b0ff718

Browse files
authored
feat: support to cast struct to json (#2067)
Fixes internal issue 444196993
1 parent 21eb213 commit b0ff718

File tree

4 files changed

+35
-4
lines changed

4 files changed

+35
-4
lines changed

bigframes/core/compile/ibis_compiler/scalar_op_registry.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,8 @@ def astype_op_impl(x: ibis_types.Value, op: ops.AsTypeOp):
10231023
x, ibis_dtypes.string, safe=op.safe
10241024
)
10251025
return parse_json_in_safe(x_str) if op.safe else parse_json(x_str)
1026+
if x.type().is_struct():
1027+
return to_json_string(typing.cast(ibis_types.StructValue, x))
10261028

10271029
if x.type() == ibis_dtypes.json:
10281030
if to_type == ibis_dtypes.int64:
@@ -2069,7 +2071,7 @@ def json_extract_string_array( # type: ignore[empty-body]
20692071

20702072
@ibis_udf.scalar.builtin(name="to_json_string")
20712073
def to_json_string( # type: ignore[empty-body]
2072-
json_obj: ibis_dtypes.JSON,
2074+
json_obj,
20732075
) -> ibis_dtypes.String:
20742076
"""Convert JSON to STRING."""
20752077

bigframes/dtypes.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,9 @@ def _dtype_from_string(dtype_string: str) -> typing.Optional[Dtype]:
641641
return BIGFRAMES_STRING_TO_BIGFRAMES[
642642
typing.cast(DtypeString, str(dtype_string))
643643
]
644+
if isinstance(dtype_string, str) and dtype_string.lower() == "json":
645+
return JSON_DTYPE
646+
644647
raise TypeError(
645648
textwrap.dedent(
646649
f"""
@@ -652,9 +655,9 @@ def _dtype_from_string(dtype_string: str) -> typing.Optional[Dtype]:
652655
The following pandas.ExtensionDtype are supported:
653656
pandas.BooleanDtype(), pandas.Float64Dtype(),
654657
pandas.Int64Dtype(), pandas.StringDtype(storage="pyarrow"),
655-
pd.ArrowDtype(pa.date32()), pd.ArrowDtype(pa.time64("us")),
656-
pd.ArrowDtype(pa.timestamp("us")),
657-
pd.ArrowDtype(pa.timestamp("us", tz="UTC")).
658+
pandas.ArrowDtype(pa.date32()), pandas.ArrowDtype(pa.time64("us")),
659+
pandas.ArrowDtype(pa.timestamp("us")),
660+
pandas.ArrowDtype(pa.timestamp("us", tz="UTC")).
658661
{constants.FEEDBACK_LINK}
659662
"""
660663
)

bigframes/operations/generic_ops.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,8 @@ def _valid_cast(src: dtypes.Dtype, dst: dtypes.Dtype):
324324
if not _valid_cast(src_dtype, dst_dtype):
325325
return False
326326
return True
327+
if dtypes.is_struct_like(src) and dst == dtypes.JSON_DTYPE:
328+
return True
327329

328330
return _valid_scalar_cast(src, dst)
329331

tests/system/small/test_series.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3866,6 +3866,30 @@ def test_string_astype_timestamp():
38663866
pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
38673867

38683868

3869+
def test_struct_astype_json():
3870+
"""See internal issue 444196993."""
3871+
s = series.Series(
3872+
[
3873+
{"version": 1, "project": "pandas"},
3874+
{"version": 2, "project": "numpy"},
3875+
]
3876+
)
3877+
assert dtypes.is_struct_like(s.dtype)
3878+
3879+
expected = series.Series(s, dtype=dtypes.JSON_DTYPE)
3880+
assert expected.dtype == dtypes.JSON_DTYPE
3881+
3882+
result = s.astype("json")
3883+
pd.testing.assert_series_equal(
3884+
result.to_pandas(), expected.to_pandas(), check_index_type=False
3885+
)
3886+
3887+
result = s.astype(dtypes.JSON_DTYPE)
3888+
pd.testing.assert_series_equal(
3889+
result.to_pandas(), expected.to_pandas(), check_index_type=False
3890+
)
3891+
3892+
38693893
def test_timestamp_astype_string():
38703894
bf_series = series.Series(
38713895
[

0 commit comments

Comments
 (0)