diff --git a/bigframes/functions/remote_function.py b/bigframes/functions/remote_function.py index 7e9df74e76..ddb36a9bef 100644 --- a/bigframes/functions/remote_function.py +++ b/bigframes/functions/remote_function.py @@ -14,6 +14,7 @@ from __future__ import annotations +import inspect import logging from typing import cast, Optional, TYPE_CHECKING import warnings @@ -149,6 +150,13 @@ def func(*ignored_args, **ignored_kwargs): expr = node(*ignored_args, **ignored_kwargs) # type: ignore return ibis_client.execute(expr) + func.__signature__ = inspect.signature(func).replace( # type: ignore + parameters=[ + inspect.Parameter(name, inspect.Parameter.POSITIONAL_OR_KEYWORD) + for name in ibis_signature.parameter_names + ] + ) + # TODO: Move ibis logic to compiler step func.__name__ = routine_ref.routine_id diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index fba1d41e30..f71b4e5c15 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1239,12 +1239,22 @@ def read_gbq_function( **Examples:** - Use the ``cw_lower_case_ascii_only`` function from Community UDFs. - (https://github.com/GoogleCloudPlatform/bigquery-utils/blob/master/udfs/community/cw_lower_case_ascii_only.sqlx) - >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None + Use the [cw_lower_case_ascii_only](https://github.com/GoogleCloudPlatform/bigquery-utils/blob/master/udfs/community/README.md#cw_lower_case_ascii_onlystr-string) + function from Community UDFs. + + >>> func = bpd.read_gbq_function("bqutil.fn.cw_lower_case_ascii_only") + + You can run it on scalar input. Usually you would do so to verify that + it works as expected before applying to all values in a Series. + + >>> func('AURÉLIE') + 'aurÉlie' + + You can apply it to a BigQuery DataFrame Series. + >>> df = bpd.DataFrame({'id': [1, 2, 3], 'name': ['AURÉLIE', 'CÉLESTINE', 'DAPHNÉ']}) >>> df id name @@ -1254,7 +1264,6 @@ def read_gbq_function( [3 rows x 2 columns] - >>> func = bpd.read_gbq_function("bqutil.fn.cw_lower_case_ascii_only") >>> df1 = df.assign(new_name=df['name'].apply(func)) >>> df1 id name new_name @@ -1264,9 +1273,17 @@ def read_gbq_function( [3 rows x 3 columns] + You can even use a function with multiple inputs. For example, let's use + [cw_instr4](https://github.com/GoogleCloudPlatform/bigquery-utils/blob/master/udfs/community/README.md#cw_instr4source-string-search-string-position-int64-ocurrence-int64) + from Community UDFs. + + >>> func = bpd.read_gbq_function("bqutil.fn.cw_instr4") + >>> func('TestStr123456Str', 'Str', 1, 2) + 14 + Args: function_name (str): - the function's name in BigQuery in the format + The function's name in BigQuery in the format `project_id.dataset_id.function_name`, or `dataset_id.function_name` to load from the default project, or `function_name` to load from the default project and the dataset diff --git a/tests/system/small/test_remote_function.py b/tests/system/small/test_remote_function.py index db573efa40..b000354ed4 100644 --- a/tests/system/small/test_remote_function.py +++ b/tests/system/small/test_remote_function.py @@ -671,12 +671,19 @@ def square1(x): @pytest.mark.flaky(retries=2, delay=120) -def test_read_gbq_function_runs_existing_udf(session, bigquery_client, dataset_id): +def test_read_gbq_function_runs_existing_udf(session): func = session.read_gbq_function("bqutil.fn.cw_lower_case_ascii_only") got = func("AURÉLIE") assert got == "aurÉlie" +@pytest.mark.flaky(retries=2, delay=120) +def test_read_gbq_function_runs_existing_udf_4_params(session): + func = session.read_gbq_function("bqutil.fn.cw_instr4") + got = func("TestStr123456Str", "Str", 1, 2) + assert got == 14 + + @pytest.mark.flaky(retries=2, delay=120) def test_read_gbq_function_reads_udfs(session, bigquery_client, dataset_id): dataset_ref = bigquery.DatasetReference.from_string(dataset_id)