From 08abf0c63e3466709d70c5a15c4c980a887a8638 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Tue, 11 Jun 2024 22:20:24 +0000
Subject: [PATCH 1/7] feat: bigframes.bigquery.json_set

---
 bigframes/bigquery/__init__.py                | 56 ++++++++++++++++++-
 bigframes/core/compile/scalar_op_compiler.py  |  9 +++
 bigframes/dtypes.py                           |  5 ++
 bigframes/operations/__init__.py              | 13 +++++
 .../ibis/backends/bigquery/registry.py        |  9 +++
 .../ibis/expr/operations/json.py              | 15 ++++-
 6 files changed, 104 insertions(+), 3 deletions(-)

diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py
index 85a9010a7d..ad008bc301 100644
--- a/bigframes/bigquery/__init__.py
+++ b/bigframes/bigquery/__init__.py
@@ -36,6 +36,10 @@
     import bigframes.series as series
 
 
+# Array functions defined from
+# https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions
+
+
 def array_length(series: series.Series) -> series.Series:
     """Compute the length of each array element in the Series.
 
@@ -154,6 +158,56 @@ def array_to_string(series: series.Series, delimiter: str) -> series.Series:
     return series._apply_unary_op(ops.ArrayToStringOp(delimiter=delimiter))
 
 
+# JSON functions defined from
+# https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions
+
+
+def json_set(
+    series: series.Series,
+    json_path_value_pairs: typing.Sequence[typing.Tuple[str, typing.Any]],
+) -> series.Series:
+    """Produces a new JSON value by inserting or replacing values at specified paths.
+
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> import numpy as np
+        >>> bpd.options.display.progress_bar = None
+
+        >>> s = bpd.read_gbq("SELECT JSON '{\\\"a\\\": 1}' AS data")["data"]
+        >>> bbq.json_set(s, json_path_value_pairs=[("$.a", 100), ("$.b", "hi")])
+            0    {"a":100,"b":"hi"}
+            Name: data, dtype: string
+
+    Args:
+        series (bigframes.series.Series):
+            The Series containing JSON data (as native JSON objects or JSON-formatted strings).
+        json_path_value_pairs (Sequence[Tuple[str, typing.Any]]):
+            Pairs of JSON path and the new value to insert/replace.
+
+    Returns:
+        bigframes.series.Series: A new Series with the transformed JSON data.
+
+    """
+    # SQLGlot parser does not support the "create_if_missing => true" syntax, so
+    # create_if_missing is not currently implemented.
+    json_path_value_tuples = []
+    for json_path_value_pair in json_path_value_pairs:
+        if len(json_path_value_pair) != 2:
+            raise ValueError(
+                "Incorrect format: Expected (<json_path>, <json_value>), but found: "
+                + f"{json_path_value_pair}"
+            )
+        json_path_value_tuples.append(tuple(json_path_value_pair))
+
+    return series._apply_unary_op(
+        ops.JSONSet(
+            json_path_value_pairs=tuple(json_path_value_tuples),
+        )
+    )
+
+
 def vector_search(
     base_table: str,
     column_to_search: str,
@@ -301,4 +355,4 @@ def vector_search(
         df = query._session.read_gbq(sql)
     df.index.names = index_labels
 
-    return df
+    return df
\ No newline at end of file
diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index 6b8e60434e..a0ec40f200 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -894,6 +894,15 @@ def array_to_string_op_impl(x: ibis_types.Value, op: ops.ArrayToStringOp):
     return typing.cast(ibis_types.ArrayValue, x).join(op.delimiter)
 
 
+# JSON Ops
+@scalar_op_compiler.register_unary_op(ops.JSONSet, pass_op=True)
+def json_set_op_impl(x: ibis_types.Value, op: ops.JSONSet):
+    return vendored_ibis_ops.JSONSet(
+        x,
+        json_path_value_pairs=op.json_path_value_pairs,
+    ).to_expr()
+
+
 ### Binary Ops
 def short_circuit_nulls(type_override: typing.Optional[ibis_dtypes.DataType] = None):
     """Wraps a binary operator to generate nulls of the expected type if either input is a null scalar."""
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 5de8f896a9..441f1f10b2 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -240,6 +240,11 @@ def is_struct_like(type: ExpressionType) -> bool:
     )
 
 
+def is_json_like(type: ExpressionType) -> bool:
+    # TODO: Add JSON type support
+    return type == STRING_DTYPE
+
+
 def is_numeric(type: ExpressionType) -> bool:
     return type in NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
 
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
index c10b743631..ee4e75e86c 100644
--- a/bigframes/operations/__init__.py
+++ b/bigframes/operations/__init__.py
@@ -602,6 +602,19 @@ def output_type(self, *input_types):
         return dtypes.STRING_DTYPE
 
 
+## JSON Ops
+@dataclasses.dataclass(frozen=True)
+class JSONSet(UnaryOp):
+    name: typing.ClassVar[str] = "json_set"
+    json_path_value_pairs: typing.Tuple[typing.Tuple[str, typing.Any], ...]
+
+    def output_type(self, *input_types):
+        input_type = input_types[0]
+        if not dtypes.is_json_like(input_type):
+            raise TypeError("Input type must be an JSON or JSON-formatted string type.")
+        return input_type
+
+
 # Binary Ops
 fillna_op = create_binary_op(name="fillna", type_signature=op_typing.COERCE)
 maximum_op = create_binary_op(name="maximum", type_signature=op_typing.COERCE)
diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py b/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
index ecef2115e5..f2dd0b7602 100644
--- a/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
+++ b/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
@@ -58,6 +58,14 @@ def _array_aggregate(translator, op: vendored_ibis_ops.ArrayAggregate):
     return f"ARRAY_AGG({arg} IGNORE NULLS {order_by_sql})"
 
 
+def _json_set(translator, op: vendored_ibis_ops.JSONSet):
+    arg = translator.translate(op.arg)
+    json_path_value_pairs_list = [
+        translator.translate(item) for pair in op.json_path_value_pairs for item in pair
+    ]
+    return f"JSON_SET(PARSE_JSON({arg}), {', '.join(json_path_value_pairs_list)})"
+
+
 patched_ops = {
     vendored_ibis_ops.ApproximateMultiQuantile: _approx_quantiles,  # type:ignore
     vendored_ibis_ops.FirstNonNullValue: _first_non_null_value,  # type:ignore
@@ -67,6 +75,7 @@ def _array_aggregate(translator, op: vendored_ibis_ops.ArrayAggregate):
     vendored_ibis_ops.SafeCastToDatetime: _safe_cast_to_datetime,  # type:ignore
     ibis_reductions.Quantile: _quantile,  # type:ignore
     vendored_ibis_ops.ArrayAggregate: _array_aggregate,  # type:ignore
+    vendored_ibis_ops.JSONSet: _json_set,  # type:ignore
 }
 
 OPERATION_REGISTRY.update(patched_ops)
diff --git a/third_party/bigframes_vendored/ibis/expr/operations/json.py b/third_party/bigframes_vendored/ibis/expr/operations/json.py
index 772c2e8ff4..2ea2e2aed7 100644
--- a/third_party/bigframes_vendored/ibis/expr/operations/json.py
+++ b/third_party/bigframes_vendored/ibis/expr/operations/json.py
@@ -1,9 +1,20 @@
 # Contains code from https://github.com/ibis-project/ibis/blob/master/ibis/expr/operations/json.py
 from __future__ import annotations
 
+import ibis.common.typing as ibis_typing
 import ibis.expr.datatypes as dt
-from ibis.expr.operations.core import Unary
+import ibis.expr.operations.core as ibis_ops_core
+import ibis.expr.rules as rlz
 
 
-class ToJsonString(Unary):
+class ToJsonString(ibis_ops_core.Unary):
     dtype = dt.string
+
+
+class JSONSet(ibis_ops_core.Unary):
+    json_path_value_pairs: ibis_typing.VarTuple[
+        ibis_typing.VarTuple[ibis_ops_core.Value[dt.Any]]
+    ]
+
+    shape = rlz.shape_like("arg")
+    dtype = rlz.dtype_like("arg")

From f9a30256cdae40557037d97e8139500828ce4181 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Wed, 12 Jun 2024 22:45:46 +0000
Subject: [PATCH 2/7] add tests

---
 bigframes/bigquery/__init__.py           |  3 +-
 tests/system/small/bigquery/test_json.py | 80 ++++++++++++++++++++++++
 2 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 tests/system/small/bigquery/test_json.py

diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py
index ad008bc301..f6686344bd 100644
--- a/bigframes/bigquery/__init__.py
+++ b/bigframes/bigquery/__init__.py
@@ -166,7 +166,8 @@ def json_set(
     series: series.Series,
     json_path_value_pairs: typing.Sequence[typing.Tuple[str, typing.Any]],
 ) -> series.Series:
-    """Produces a new JSON value by inserting or replacing values at specified paths.
+    """Produces a new JSON value within a Series by inserting or replacing values at
+    specified paths.
 
     **Examples:**
 
diff --git a/tests/system/small/bigquery/test_json.py b/tests/system/small/bigquery/test_json.py
new file mode 100644
index 0000000000..da49f45202
--- /dev/null
+++ b/tests/system/small/bigquery/test_json.py
@@ -0,0 +1,80 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+
+import pandas as pd
+
+import bigframes.bigquery as bbq
+import bigframes.pandas as bpd
+
+
+def _get_series_from_json(json_data):
+    sql = " UNION ALL ".join(
+        [f"SELECT JSON '{json.dumps(data)}' AS data" for data in json_data]
+    )
+    return bpd.read_gbq(sql)["data"]
+
+
+def test_json_set():
+    init_json = [
+        {"a": 1},
+    ]
+    s = _get_series_from_json(init_json)
+    actual = bbq.json_set(s, json_path_value_pairs=[("$.a", 10)])
+
+    expected_json = [
+        {"a": 10},
+    ]
+    expected = _get_series_from_json(expected_json)
+    pd.testing.assert_series_equal(
+        actual.to_pandas(),
+        expected.to_pandas(),
+    )
+
+
+def test_json_set_w_nested_json():
+    init_json = [
+        {"a": {"b": {"c": "tester", "d": []}}},
+    ]
+    s = _get_series_from_json(init_json)
+    actual = bbq.json_set(s, json_path_value_pairs=[("$.a.b.c", "user")])
+
+    expected_json = [
+        {"a": {"b": {"c": "user", "d": []}}},
+    ]
+    expected = _get_series_from_json(expected_json)
+    pd.testing.assert_series_equal(
+        actual.to_pandas(),
+        expected.to_pandas(),
+    )
+
+
+def test_json_set_w_ordered_pairs():
+    init_json = [
+        {"a": {"b": {"c": {}}}},
+    ]
+    s = _get_series_from_json(init_json)
+    actual = bbq.json_set(
+        s, json_path_value_pairs=[("$.a.b.e", "user"), ("$.a.b.e", "dev")]
+    )
+
+    expected_json = [
+        {"a": {"b": {"c": {}, "e": "dev"}}},
+    ]
+    expected = _get_series_from_json(expected_json)
+    pd.testing.assert_series_equal(
+        actual.to_pandas(),
+        expected.to_pandas(),
+    )

From ad53e78ae27be39587d55b6d6e6a335058d47e8f Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Wed, 12 Jun 2024 22:50:10 +0000
Subject: [PATCH 3/7] fix

---
 bigframes/bigquery/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py
index f6686344bd..235ad22568 100644
--- a/bigframes/bigquery/__init__.py
+++ b/bigframes/bigquery/__init__.py
@@ -356,4 +356,4 @@ def vector_search(
         df = query._session.read_gbq(sql)
     df.index.names = index_labels
 
-    return df
\ No newline at end of file
+    return df

From a60779454e0fb8bf29d939d46b818b36a198a008 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Wed, 12 Jun 2024 23:03:08 +0000
Subject: [PATCH 4/7] fixes

---
 tests/system/small/bigquery/test_json.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/system/small/bigquery/test_json.py b/tests/system/small/bigquery/test_json.py
index da49f45202..17690cacdf 100644
--- a/tests/system/small/bigquery/test_json.py
+++ b/tests/system/small/bigquery/test_json.py
@@ -62,7 +62,7 @@ def test_json_set_w_nested_json():
 
 
 def test_json_set_w_ordered_pairs():
-    init_json = [
+    init_json: object = [
         {"a": {"b": {"c": {}}}},
     ]
     s = _get_series_from_json(init_json)
@@ -70,7 +70,7 @@ def test_json_set_w_ordered_pairs():
         s, json_path_value_pairs=[("$.a.b.e", "user"), ("$.a.b.e", "dev")]
     )
 
-    expected_json = [
+    expected_json: object = [
         {"a": {"b": {"c": {}, "e": "dev"}}},
     ]
     expected = _get_series_from_json(expected_json)

From b5ffb59e11ccb8628bbf3a53ffc47bc2258dcdb6 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Thu, 27 Jun 2024 18:04:08 +0000
Subject: [PATCH 5/7] update to binary operator for series supports

---
 bigframes/bigquery/__init__.py                | 35 +++++----
 bigframes/core/compile/scalar_op_compiler.py  |  7 +-
 bigframes/operations/__init__.py              | 26 +++----
 tests/system/small/bigquery/test_json.py      | 76 ++++++++++---------
 .../ibis/backends/bigquery/registry.py        |  7 +-
 .../ibis/expr/operations/json.py              |  6 +-
 6 files changed, 84 insertions(+), 73 deletions(-)

diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py
index 235ad22568..7595b08237 100644
--- a/bigframes/bigquery/__init__.py
+++ b/bigframes/bigquery/__init__.py
@@ -177,8 +177,11 @@ def json_set(
         >>> bpd.options.display.progress_bar = None
 
         >>> s = bpd.read_gbq("SELECT JSON '{\\\"a\\\": 1}' AS data")["data"]
-        >>> bbq.json_set(s, json_path_value_pairs=[("$.a", 100), ("$.b", "hi")])
-            0    {"a":100,"b":"hi"}
+        >>> bbq.json_set(s, json_path_value_pairs=[("$.a", 100)])
+            0    {"a":100}
+            Name: data, dtype: string
+        >>> bbq.json_set(s, json_path_value_pairs=[("$.b", "hi")])
+            0    {"a":1,"b":"hi"}
             Name: data, dtype: string
 
     Args:
@@ -193,19 +196,23 @@ def json_set(
     """
     # SQLGlot parser does not support the "create_if_missing => true" syntax, so
     # create_if_missing is not currently implemented.
-    json_path_value_tuples = []
-    for json_path_value_pair in json_path_value_pairs:
-        if len(json_path_value_pair) != 2:
-            raise ValueError(
-                "Incorrect format: Expected (<json_path>, <json_value>), but found: "
-                + f"{json_path_value_pair}"
-            )
-        json_path_value_tuples.append(tuple(json_path_value_pair))
-
-    return series._apply_unary_op(
-        ops.JSONSet(
-            json_path_value_pairs=tuple(json_path_value_tuples),
+
+    # Currently limited to single JSON path/value pairs (binary operations only).
+    if len(json_path_value_pairs) != 1:
+        raise ValueError(
+            "Expected exactly one JSON path and value pair but found "
+            + f"{len(json_path_value_pairs)} pairs."
         )
+
+    if len(json_path_value_pairs[0]) != 2:
+        raise ValueError(
+            "Incorrect format: Expected (<json_path>, <json_value>), but found: "
+            + f"{json_path_value_pairs[0]}"
+        )
+
+    json_path, json_value = json_path_value_pairs[0]
+    return series._apply_binary_op(
+        json_value, ops.JSONSet(json_path=json_path), alignment="left"
     )
 
 
diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index a0ec40f200..dbcf38f9ca 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -895,11 +895,12 @@ def array_to_string_op_impl(x: ibis_types.Value, op: ops.ArrayToStringOp):
 
 
 # JSON Ops
-@scalar_op_compiler.register_unary_op(ops.JSONSet, pass_op=True)
-def json_set_op_impl(x: ibis_types.Value, op: ops.JSONSet):
+@scalar_op_compiler.register_binary_op(ops.JSONSet, pass_op=True)
+def json_set_op_impl(x: ibis_types.Value, y: ibis_types.Value, op: ops.JSONSet):
     return vendored_ibis_ops.JSONSet(
         x,
-        json_path_value_pairs=op.json_path_value_pairs,
+        json_value=y,
+        json_path=op.json_path,
     ).to_expr()
 
 
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
index ee4e75e86c..b288a82f3e 100644
--- a/bigframes/operations/__init__.py
+++ b/bigframes/operations/__init__.py
@@ -602,19 +602,6 @@ def output_type(self, *input_types):
         return dtypes.STRING_DTYPE
 
 
-## JSON Ops
-@dataclasses.dataclass(frozen=True)
-class JSONSet(UnaryOp):
-    name: typing.ClassVar[str] = "json_set"
-    json_path_value_pairs: typing.Tuple[typing.Tuple[str, typing.Any], ...]
-
-    def output_type(self, *input_types):
-        input_type = input_types[0]
-        if not dtypes.is_json_like(input_type):
-            raise TypeError("Input type must be an JSON or JSON-formatted string type.")
-        return input_type
-
-
 # Binary Ops
 fillna_op = create_binary_op(name="fillna", type_signature=op_typing.COERCE)
 maximum_op = create_binary_op(name="maximum", type_signature=op_typing.COERCE)
@@ -720,6 +707,19 @@ def output_type(self, *input_types):
 strconcat_op = StrConcatOp()
 
 
+## JSON Ops
+@dataclasses.dataclass(frozen=True)
+class JSONSet(BinaryOp):
+    name: typing.ClassVar[str] = "json_set"
+    json_path: str
+
+    def output_type(self, *input_types):
+        input_type = input_types[0]
+        if not dtypes.is_json_like(input_type):
+            raise TypeError("Input type must be an JSON or JSON-formatted string type.")
+        return input_type
+
+
 # Ternary Ops
 @dataclasses.dataclass(frozen=True)
 class WhereOp(TernaryOp):
diff --git a/tests/system/small/bigquery/test_json.py b/tests/system/small/bigquery/test_json.py
index 17690cacdf..0d653b55be 100644
--- a/tests/system/small/bigquery/test_json.py
+++ b/tests/system/small/bigquery/test_json.py
@@ -15,6 +15,7 @@
 import json
 
 import pandas as pd
+import pytest
 
 import bigframes.bigquery as bbq
 import bigframes.pandas as bpd
@@ -27,16 +28,17 @@ def _get_series_from_json(json_data):
     return bpd.read_gbq(sql)["data"]
 
 
-def test_json_set():
-    init_json = [
-        {"a": 1},
-    ]
-    s = _get_series_from_json(init_json)
-    actual = bbq.json_set(s, json_path_value_pairs=[("$.a", 10)])
+@pytest.mark.parametrize(
+    ("json_path", "expected_json"),
+    [
+        pytest.param("$.a", [{"a": 10}], id="simple"),
+        pytest.param("$.a.b.c", [{"a": {"b": {"c": 10, "d": []}}}], id="nested"),
+    ],
+)
+def test_json_set_at_json_path(json_path, expected_json):
+    s = _get_series_from_json([{"a": {"b": {"c": "tester", "d": []}}}])
+    actual = bbq.json_set(s, json_path_value_pairs=[(json_path, 10)])
 
-    expected_json = [
-        {"a": 10},
-    ]
     expected = _get_series_from_json(expected_json)
     pd.testing.assert_series_equal(
         actual.to_pandas(),
@@ -44,16 +46,19 @@ def test_json_set():
     )
 
 
-def test_json_set_w_nested_json():
-    init_json = [
-        {"a": {"b": {"c": "tester", "d": []}}},
-    ]
-    s = _get_series_from_json(init_json)
-    actual = bbq.json_set(s, json_path_value_pairs=[("$.a.b.c", "user")])
+@pytest.mark.parametrize(
+    ("json_value", "expected_json"),
+    [
+        pytest.param(10, [{"a": {"b": 10}}, {"a": {"b": 10}}], id="int"),
+        pytest.param(0.333, [{"a": {"b": 0.333}}, {"a": {"b": 0.333}}], id="float"),
+        pytest.param("eng", [{"a": {"b": "eng"}}, {"a": {"b": "eng"}}], id="string"),
+        pytest.param([1, 1], [{"a": {"b": 1}}, {"a": {"b": 1}}], id="series"),
+    ],
+)
+def test_json_set_at_json_value_type(json_value, expected_json):
+    s = _get_series_from_json([{"a": {"b": "dev"}}, {"a": {"b": [1, 2]}}])
+    actual = bbq.json_set(s, json_path_value_pairs=[("$.a.b", json_value)])
 
-    expected_json = [
-        {"a": {"b": {"c": "user", "d": []}}},
-    ]
     expected = _get_series_from_json(expected_json)
     pd.testing.assert_series_equal(
         actual.to_pandas(),
@@ -61,20 +66,21 @@ def test_json_set_w_nested_json():
     )
 
 
-def test_json_set_w_ordered_pairs():
-    init_json: object = [
-        {"a": {"b": {"c": {}}}},
-    ]
-    s = _get_series_from_json(init_json)
-    actual = bbq.json_set(
-        s, json_path_value_pairs=[("$.a.b.e", "user"), ("$.a.b.e", "dev")]
-    )
-
-    expected_json: object = [
-        {"a": {"b": {"c": {}, "e": "dev"}}},
-    ]
-    expected = _get_series_from_json(expected_json)
-    pd.testing.assert_series_equal(
-        actual.to_pandas(),
-        expected.to_pandas(),
-    )
+@pytest.mark.parametrize(
+    ("json_path_value_pairs"),
+    [
+        pytest.param(
+            [("$.a", 1), ("$.b", 2)],
+            id="two_pairs",
+            marks=pytest.mark.xfail(raises=ValueError),
+        ),
+        pytest.param(
+            [("$.a", 1, 100)],
+            id="invalid_pair",
+            marks=pytest.mark.xfail(raises=ValueError),
+        ),
+    ],
+)
+def test_json_set_w_invalid_param(json_path_value_pairs):
+    s = _get_series_from_json([{"a": {"b": {"c": {}, "e": "dev"}}}])
+    bbq.json_set(s, json_path_value_pairs=json_path_value_pairs)
diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py b/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
index f2dd0b7602..863661759a 100644
--- a/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
+++ b/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
@@ -60,10 +60,9 @@ def _array_aggregate(translator, op: vendored_ibis_ops.ArrayAggregate):
 
 def _json_set(translator, op: vendored_ibis_ops.JSONSet):
     arg = translator.translate(op.arg)
-    json_path_value_pairs_list = [
-        translator.translate(item) for pair in op.json_path_value_pairs for item in pair
-    ]
-    return f"JSON_SET(PARSE_JSON({arg}), {', '.join(json_path_value_pairs_list)})"
+    json_value = translator.translate(op.json_value)
+    json_path = translator.translate(op.json_path)
+    return f"JSON_SET(PARSE_JSON({arg}), {json_path}, {json_value})"
 
 
 patched_ops = {
diff --git a/third_party/bigframes_vendored/ibis/expr/operations/json.py b/third_party/bigframes_vendored/ibis/expr/operations/json.py
index 2ea2e2aed7..c3eaff206c 100644
--- a/third_party/bigframes_vendored/ibis/expr/operations/json.py
+++ b/third_party/bigframes_vendored/ibis/expr/operations/json.py
@@ -1,7 +1,6 @@
 # Contains code from https://github.com/ibis-project/ibis/blob/master/ibis/expr/operations/json.py
 from __future__ import annotations
 
-import ibis.common.typing as ibis_typing
 import ibis.expr.datatypes as dt
 import ibis.expr.operations.core as ibis_ops_core
 import ibis.expr.rules as rlz
@@ -12,9 +11,8 @@ class ToJsonString(ibis_ops_core.Unary):
 
 
 class JSONSet(ibis_ops_core.Unary):
-    json_path_value_pairs: ibis_typing.VarTuple[
-        ibis_typing.VarTuple[ibis_ops_core.Value[dt.Any]]
-    ]
+    json_value: ibis_ops_core.Value[dt.Any]
+    json_path: ibis_ops_core.Value[dt.String]
 
     shape = rlz.shape_like("arg")
     dtype = rlz.dtype_like("arg")

From fdb9c40990256ae378fa6f5488eb29cf7fd0391b Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Fri, 28 Jun 2024 21:28:56 +0000
Subject: [PATCH 6/7] supports more than two pairs, defines as ibis udf

---
 bigframes/bigquery/__init__.py                | 27 +++++++---------
 bigframes/core/compile/scalar_op_compiler.py  | 32 ++++++++++++++++---
 bigframes/dtypes.py                           |  6 ++++
 bigframes/operations/__init__.py              | 19 ++++++++---
 .../ibis/backends/bigquery/registry.py        |  8 -----
 .../ibis/expr/operations/json.py              |  9 ------
 6 files changed, 59 insertions(+), 42 deletions(-)

diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py
index 7595b08237..e89cb8201b 100644
--- a/bigframes/bigquery/__init__.py
+++ b/bigframes/bigquery/__init__.py
@@ -197,23 +197,18 @@ def json_set(
     # SQLGlot parser does not support the "create_if_missing => true" syntax, so
     # create_if_missing is not currently implemented.
 
-    # Currently limited to single JSON path/value pairs (binary operations only).
-    if len(json_path_value_pairs) != 1:
-        raise ValueError(
-            "Expected exactly one JSON path and value pair but found "
-            + f"{len(json_path_value_pairs)} pairs."
-        )
-
-    if len(json_path_value_pairs[0]) != 2:
-        raise ValueError(
-            "Incorrect format: Expected (<json_path>, <json_value>), but found: "
-            + f"{json_path_value_pairs[0]}"
+    for json_path_value_pair in json_path_value_pairs:
+        if len(json_path_value_pair) != 2:
+            raise ValueError(
+                "Incorrect format: Expected (<json_path>, <json_value>), but found: "
+                + f"{json_path_value_pair}"
+            )
+
+        json_path, json_value = json_path_value_pair
+        series = series._apply_binary_op(
+            json_value, ops.JSONSet(json_path=json_path), alignment="left"
         )
-
-    json_path, json_value = json_path_value_pairs[0]
-    return series._apply_binary_op(
-        json_value, ops.JSONSet(json_path=json_path), alignment="left"
-    )
+    return series
 
 
 def vector_search(
diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index dbcf38f9ca..0bc9f2e370 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -897,11 +897,21 @@ def array_to_string_op_impl(x: ibis_types.Value, op: ops.ArrayToStringOp):
 # JSON Ops
 @scalar_op_compiler.register_binary_op(ops.JSONSet, pass_op=True)
 def json_set_op_impl(x: ibis_types.Value, y: ibis_types.Value, op: ops.JSONSet):
-    return vendored_ibis_ops.JSONSet(
-        x,
-        json_value=y,
-        json_path=op.json_path,
-    ).to_expr()
+    if x.type().is_json():
+        return json_set(
+            json_obj=x,
+            json_path=op.json_path,
+            json_value=y,
+        ).to_expr()
+    else:
+        # Enabling JSON type eliminates the need for less efficient string conversions.
+        return vendored_ibis_ops.ToJsonString(
+            json_set(
+                json_obj=parse_json(x),
+                json_path=op.json_path,
+                json_value=y,
+            )
+        ).to_expr()
 
 
 ### Binary Ops
@@ -1479,3 +1489,15 @@ def float_floor(a: float) -> float:
 def float_ceil(a: float) -> float:
     """Convert string to timestamp."""
     return 0  # pragma: NO COVER
+
+
+@ibis.udf.scalar.builtin(name="parse_json")
+def parse_json(a: str) -> ibis_dtypes.JSON:
+    """Converts a JSON-formatted STRING value to a JSON value."""
+
+
+@ibis.udf.scalar.builtin(name="json_set")
+def json_set(
+    json_obj: ibis_dtypes.JSON, json_path: ibis_dtypes.str, json_value
+) -> ibis_dtypes.JSON:
+    """Produces a new SQL JSON value with the specified JSON data inserted or replaced."""
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 441f1f10b2..160802ded9 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -245,6 +245,12 @@ def is_json_like(type: ExpressionType) -> bool:
     return type == STRING_DTYPE
 
 
+def is_json_encoding_type(type: ExpressionType) -> bool:
+    # Types can be converted into JSON.
+    # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_encodings
+    return type != GEO_DTYPE
+
+
 def is_numeric(type: ExpressionType) -> bool:
     return type in NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
 
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
index b288a82f3e..145c415ca0 100644
--- a/bigframes/operations/__init__.py
+++ b/bigframes/operations/__init__.py
@@ -714,10 +714,21 @@ class JSONSet(BinaryOp):
     json_path: str
 
     def output_type(self, *input_types):
-        input_type = input_types[0]
-        if not dtypes.is_json_like(input_type):
-            raise TypeError("Input type must be an JSON or JSON-formatted string type.")
-        return input_type
+        left_type = input_types[0]
+        right_type = input_types[1]
+        if not dtypes.is_json_like(left_type):
+            raise TypeError(
+                "Input type must be an valid JSON object or JSON-formatted string type."
+                + f" Received type: {left_type}"
+            )
+        if not dtypes.is_json_encoding_type(right_type):
+            raise TypeError(
+                "The value to be assigned must be a type that can be encoded as JSON."
+                + f"Received type: {right_type}"
+            )
+
+        # After JSON type implementation, ONLY return JSON data.
+        return left_type
 
 
 # Ternary Ops
diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py b/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
index 863661759a..ecef2115e5 100644
--- a/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
+++ b/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
@@ -58,13 +58,6 @@ def _array_aggregate(translator, op: vendored_ibis_ops.ArrayAggregate):
     return f"ARRAY_AGG({arg} IGNORE NULLS {order_by_sql})"
 
 
-def _json_set(translator, op: vendored_ibis_ops.JSONSet):
-    arg = translator.translate(op.arg)
-    json_value = translator.translate(op.json_value)
-    json_path = translator.translate(op.json_path)
-    return f"JSON_SET(PARSE_JSON({arg}), {json_path}, {json_value})"
-
-
 patched_ops = {
     vendored_ibis_ops.ApproximateMultiQuantile: _approx_quantiles,  # type:ignore
     vendored_ibis_ops.FirstNonNullValue: _first_non_null_value,  # type:ignore
@@ -74,7 +67,6 @@ def _json_set(translator, op: vendored_ibis_ops.JSONSet):
     vendored_ibis_ops.SafeCastToDatetime: _safe_cast_to_datetime,  # type:ignore
     ibis_reductions.Quantile: _quantile,  # type:ignore
     vendored_ibis_ops.ArrayAggregate: _array_aggregate,  # type:ignore
-    vendored_ibis_ops.JSONSet: _json_set,  # type:ignore
 }
 
 OPERATION_REGISTRY.update(patched_ops)
diff --git a/third_party/bigframes_vendored/ibis/expr/operations/json.py b/third_party/bigframes_vendored/ibis/expr/operations/json.py
index c3eaff206c..1eb0554137 100644
--- a/third_party/bigframes_vendored/ibis/expr/operations/json.py
+++ b/third_party/bigframes_vendored/ibis/expr/operations/json.py
@@ -3,16 +3,7 @@
 
 import ibis.expr.datatypes as dt
 import ibis.expr.operations.core as ibis_ops_core
-import ibis.expr.rules as rlz
 
 
 class ToJsonString(ibis_ops_core.Unary):
     dtype = dt.string
-
-
-class JSONSet(ibis_ops_core.Unary):
-    json_value: ibis_ops_core.Value[dt.Any]
-    json_path: ibis_ops_core.Value[dt.String]
-
-    shape = rlz.shape_like("arg")
-    dtype = rlz.dtype_like("arg")

From 537e4b59abbfbcb5f429354fb5d10e4d3e4af6ec Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Fri, 28 Jun 2024 22:20:11 +0000
Subject: [PATCH 7/7] add more tests

---
 bigframes/bigquery/__init__.py           |  7 +--
 tests/system/small/bigquery/test_json.py | 57 +++++++++++++++++++-----
 2 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py
index e89cb8201b..ec26d14f33 100644
--- a/bigframes/bigquery/__init__.py
+++ b/bigframes/bigquery/__init__.py
@@ -177,11 +177,8 @@ def json_set(
         >>> bpd.options.display.progress_bar = None
 
         >>> s = bpd.read_gbq("SELECT JSON '{\\\"a\\\": 1}' AS data")["data"]
-        >>> bbq.json_set(s, json_path_value_pairs=[("$.a", 100)])
-            0    {"a":100}
-            Name: data, dtype: string
-        >>> bbq.json_set(s, json_path_value_pairs=[("$.b", "hi")])
-            0    {"a":1,"b":"hi"}
+        >>> bbq.json_set(s, json_path_value_pairs=[("$.a", 100), ("$.b", "hi")])
+            0    {"a":100,"b":"hi"}
             Name: data, dtype: string
 
     Args:
diff --git a/tests/system/small/bigquery/test_json.py b/tests/system/small/bigquery/test_json.py
index 0d653b55be..ff759b8fda 100644
--- a/tests/system/small/bigquery/test_json.py
+++ b/tests/system/small/bigquery/test_json.py
@@ -14,6 +14,7 @@
 
 import json
 
+import geopandas as gpd  # type: ignore
 import pandas as pd
 import pytest
 
@@ -23,9 +24,13 @@
 
 def _get_series_from_json(json_data):
     sql = " UNION ALL ".join(
-        [f"SELECT JSON '{json.dumps(data)}' AS data" for data in json_data]
+        [
+            f"SELECT {id} AS id, JSON '{json.dumps(data)}' AS data"
+            for id, data in enumerate(json_data)
+        ]
     )
-    return bpd.read_gbq(sql)["data"]
+    df = bpd.read_gbq(sql).set_index("id").sort_index()
+    return df["data"]
 
 
 @pytest.mark.parametrize(
@@ -52,7 +57,7 @@ def test_json_set_at_json_path(json_path, expected_json):
         pytest.param(10, [{"a": {"b": 10}}, {"a": {"b": 10}}], id="int"),
         pytest.param(0.333, [{"a": {"b": 0.333}}, {"a": {"b": 0.333}}], id="float"),
         pytest.param("eng", [{"a": {"b": "eng"}}, {"a": {"b": "eng"}}], id="string"),
-        pytest.param([1, 1], [{"a": {"b": 1}}, {"a": {"b": 1}}], id="series"),
+        pytest.param([1, 2], [{"a": {"b": 1}}, {"a": {"b": 2}}], id="series"),
     ],
 )
 def test_json_set_at_json_value_type(json_value, expected_json):
@@ -66,21 +71,49 @@ def test_json_set_at_json_value_type(json_value, expected_json):
     )
 
 
+def test_json_set_w_more_pairs():
+    s = _get_series_from_json([{"a": 2}, {"b": 5}, {"c": 1}])
+    actual = bbq.json_set(
+        s, json_path_value_pairs=[("$.a", 1), ("$.b", 2), ("$.a", [3, 4, 5])]
+    )
+    expected = _get_series_from_json(
+        [{"a": 3, "b": 2}, {"a": 4, "b": 2}, {"a": 5, "b": 2, "c": 1}]
+    )
+    pd.testing.assert_series_equal(
+        actual.to_pandas(),
+        expected.to_pandas(),
+    )
+
+
 @pytest.mark.parametrize(
-    ("json_path_value_pairs"),
+    ("series", "json_path_value_pairs"),
     [
         pytest.param(
-            [("$.a", 1), ("$.b", 2)],
-            id="two_pairs",
+            _get_series_from_json([{"a": 10}]),
+            [("$.a", 1, 100)],
+            id="invalid_json_path_value_pairs",
             marks=pytest.mark.xfail(raises=ValueError),
         ),
         pytest.param(
-            [("$.a", 1, 100)],
-            id="invalid_pair",
-            marks=pytest.mark.xfail(raises=ValueError),
+            _get_series_from_json([{"a": 10}]),
+            [
+                (
+                    "$.a",
+                    bpd.read_pandas(
+                        gpd.GeoSeries.from_wkt(["POINT (1 2)", "POINT (2 1)"])
+                    ),
+                )
+            ],
+            id="invalid_json_value_type",
+            marks=pytest.mark.xfail(raises=TypeError),
+        ),
+        pytest.param(
+            bpd.Series([1, 2]),
+            [("$.a", 1)],
+            id="invalid_series_type",
+            marks=pytest.mark.xfail(raises=TypeError),
         ),
     ],
 )
-def test_json_set_w_invalid_param(json_path_value_pairs):
-    s = _get_series_from_json([{"a": {"b": {"c": {}, "e": "dev"}}}])
-    bbq.json_set(s, json_path_value_pairs=json_path_value_pairs)
+def test_json_set_w_invalid(series, json_path_value_pairs):
+    bbq.json_set(series, json_path_value_pairs=json_path_value_pairs)