From 97a948c965f4f8385b290b6fdf85df4141d4d9ed Mon Sep 17 00:00:00 2001 From: Henry Solberg Date: Fri, 12 Apr 2024 16:58:17 -0700 Subject: [PATCH 1/5] feat: support list of numerics in pandas.cut (#580) An internal user encountered this missing overload --- bigframes/ml/core.py | 40 ++++++++++++++ bigframes/ml/llm.py | 86 ++++++++++++++++++++++++++++++- bigframes/ml/sql.py | 18 +++++++ tests/system/conftest.py | 13 +++++ tests/system/large/ml/test_llm.py | 36 +++++++++++++ tests/system/small/ml/conftest.py | 12 +++++ tests/unit/ml/test_sql.py | 23 +++++++++ 7 files changed, 227 insertions(+), 1 deletion(-) create mode 100644 tests/system/large/ml/test_llm.py diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py index 04aaeec1bc..b94ae39687 100644 --- a/bigframes/ml/core.py +++ b/bigframes/ml/core.py @@ -321,6 +321,46 @@ def create_model( return self._create_model_with_sql(session=session, sql=sql) + def create_llm_remote_model( + self, + X_train: bpd.DataFrame, + y_train: bpd.DataFrame, + connection_name: str, + options: Mapping[str, Union[str, int, float, Iterable[str]]] = {}, + ) -> BqmlModel: + """Create a session-temporary BQML model with the CREATE OR REPLACE MODEL statement + + Args: + X_train: features columns for training + y_train: labels columns for training + options: a dict of options to configure the model. Generates a BQML OPTIONS + clause + connection_name: + a BQ connection to talk with Vertex AI, of the format ... https://cloud.google.com/bigquery/docs/create-cloud-resource-connection + + Returns: a BqmlModel, wrapping a trained model in BigQuery + """ + options = dict(options) + # Cache dataframes to make sure base table is not a snapshot + # cached dataframe creates a full copy, never uses snapshot + input_data = X_train._cached(force=True).join( + y_train._cached(force=True), how="outer" + ) + options.update({"INPUT_LABEL_COLS": y_train.columns.tolist()}) + + session = X_train._session + + model_ref = self._create_model_ref(session._anonymous_dataset) + + sql = self._model_creation_sql_generator.create_llm_remote_model( + source_df=input_data, + model_ref=model_ref, + options=options, + connection_name=connection_name, + ) + + return self._create_model_with_sql(session=session, sql=sql) + def create_time_series_model( self, X_train: bpd.DataFrame, diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index 31c691fd51..52569df161 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -27,6 +27,11 @@ from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd +_BQML_PARAMS_MAPPING = { + "max_iterations": "maxIterations", + "evaluation_task": "evaluationTask", +} + _TEXT_GENERATOR_BISON_ENDPOINT = "text-bison" _TEXT_GENERATOR_BISON_32K_ENDPOINT = "text-bison-32k" _TEXT_GENERATOR_ENDPOINTS = ( @@ -51,6 +56,12 @@ class PaLM2TextGenerator(base.BaseEstimator): """PaLM2 text generator LLM model. + .. note:: + This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the + Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" + and might have limited support. For more information, see the launch stage descriptions + (https://cloud.google.com/products#product-launch-stages). + Args: model_name (str, Default to "text-bison"): The model for natural language tasks. “text-bison” returns model fine-tuned to follow natural language instructions @@ -62,6 +73,11 @@ class PaLM2TextGenerator(base.BaseEstimator): Connection to connect with remote service. str of the format ... if None, use default connection in session context. BigQuery DataFrame will try to create the connection and attach permission if the connection isn't fully setup. + max_iterations (Optional[int], Default to 300): + The number of steps to run when performing supervised tuning. + evaluation_task (Optional[str], default to "UNSPECIFIED"): + When performing supervised tuning, the type of task that you want to tune the model to perform. Possible values: + "TEXT_GENERATION", "CLASSIFICATION", "SUMMARIZATION", "QUESTION_ANSWERING", "UNSPECIFIED". Default to "UNSPECIFIED". """ def __init__( @@ -70,9 +86,19 @@ def __init__( model_name: Literal["text-bison", "text-bison-32k"] = "text-bison", session: Optional[bigframes.Session] = None, connection_name: Optional[str] = None, + max_iterations: int = 300, + evaluation_task: Literal[ + "UNSPECIFIED", + "TEXT_GENERATION", + "CLASSIFICATION", + "SUMMARIZATION", + "QUESTION_ANSWERING", + ] = "UNSPECIFIED", ): self.model_name = model_name self.session = session or bpd.get_global_session() + self.max_iterations = max_iterations + self.evaluation_task = evaluation_task self._bq_connection_manager = self.session.bqconnectionmanager connection_name = connection_name or self.session._bq_connection @@ -132,12 +158,70 @@ def _from_bq( model_connection = model._properties["remoteModelInfo"]["connection"] model_endpoint = bqml_endpoint.split("/")[-1] + # Get the optional params + kwargs: dict = {} + last_fitting = model.training_runs[-1]["trainingOptions"] + + dummy_arima = cls() + for bf_param, _ in dummy_arima.__dict__.items(): + bqml_param = _BQML_PARAMS_MAPPING.get(bf_param) + if bqml_param in last_fitting: + # Convert types + if bf_param in ["max_iterations"]: + kwargs[bf_param] = int(last_fitting[bqml_param]) + elif bf_param in ["evaluation_task"]: + kwargs[bf_param] = str(last_fitting[bqml_param]) + text_generator_model = cls( - session=session, model_name=model_endpoint, connection_name=model_connection + **kwargs, + session=session, + model_name=model_endpoint, + connection_name=model_connection, ) text_generator_model._bqml_model = core.BqmlModel(session, model) return text_generator_model + @property + def _bqml_options(self) -> dict: + """The model options as they will be set for BQML""" + options = { + "max_iterations": self.max_iterations, + "data_split_method": "NO_SPLIT", + "evaluation_task": self.evaluation_task, + } + return options + + def fit( + self, + X: Union[bpd.DataFrame, bpd.Series], + y: Union[bpd.DataFrame, bpd.Series], + ) -> PaLM2TextGenerator: + """Fine tune PaLM2TextGenerator model. + + Args: + X (bigframes.dataframe.DataFrame or bigframes.series.Series): + DataFrame of shape (n_samples, n_features). Training data. + y (bigframes.dataframe.DataFrame or bigframes.series.Series: + Training labels. + + Returns: + PaLM2TextGenerator: Fitted Estimator. + """ + X, y = utils.convert_to_dataframe(X, y) + + # TODO(ashleyxu): options= self._bqml_options + options = self._bqml_options + options["endpoint"] = self.model_name + "@001" + options["prompt_col"] = X.columns.tolist()[0] + + self._bqml_model = self._bqml_model_factory.create_llm_remote_model( + X, + y, + options=options, + connection_name=self.connection_name, + ) + return self + def predict( self, X: Union[bpd.DataFrame, bpd.Series], diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py index fab358cce3..a81790173e 100644 --- a/bigframes/ml/sql.py +++ b/bigframes/ml/sql.py @@ -177,6 +177,24 @@ def create_model( parts.append(f"AS {source_sql}") return "\n".join(parts) + # Model create and alter + def create_llm_remote_model( + self, + source_df: bpd.DataFrame, + connection_name: str, + model_ref: google.cloud.bigquery.ModelReference, + options: Mapping[str, Union[str, int, float, Iterable[str]]] = {}, + ) -> str: + """Encode the CREATE OR REPLACE MODEL statement for BQML""" + source_sql = source_df.sql + + parts = [f"CREATE OR REPLACE MODEL {self._model_id_sql(model_ref)}"] + parts.append(self.connection(connection_name)) + if options: + parts.append(self.options(**options)) + parts.append(f"AS {source_sql}") + return "\n".join(parts) + def create_remote_model( self, connection_name: str, diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 70ff6eee39..2048c9751b 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -537,6 +537,19 @@ def penguins_df_default_index( return session.read_gbq(penguins_table_id) +@pytest.fixture(scope="session") +def llm_fine_tune_df_default_index( + session: bigframes.Session, +) -> bigframes.dataframe.DataFrame: + sql = """ +SELECT + CONCAT("Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: ", text) as prompt, + CAST(label AS STRING) as label +FROM `llm_tuning.emotion_classification_train` +""" + return session.read_gbq(sql) + + @pytest.fixture(scope="session") def time_series_df_default_index( time_series_table_id: str, session: bigframes.Session diff --git a/tests/system/large/ml/test_llm.py b/tests/system/large/ml/test_llm.py new file mode 100644 index 0000000000..1258c597a4 --- /dev/null +++ b/tests/system/large/ml/test_llm.py @@ -0,0 +1,36 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import bigframes.ml.llm + + +def test_llm_palm_configure_fit(llm_fine_tune_df_default_index, dataset_id): + model = bigframes.ml.llm.PaLM2TextGenerator( + model_name="text-bison", max_iterations=1, evaluation_task="CLASSIFICATION" + ) + + df = llm_fine_tune_df_default_index.dropna() + X_train = df[["prompt"]] + y_train = df[["label"]] + model.fit(X_train, y_train) + + # save, load, check parameters to ensure configuration was kept + reloaded_model = model.to_gbq( + f"{dataset_id}.temp_configured_palm_model", replace=True + ) + assert ( + f"{dataset_id}.temp_configured_palm_model" + in reloaded_model._bqml_model.model_name + ) + assert reloaded_model.evaluation_task == "CLASSIFICATION" diff --git a/tests/system/small/ml/conftest.py b/tests/system/small/ml/conftest.py index 33351afe45..3d380794bc 100644 --- a/tests/system/small/ml/conftest.py +++ b/tests/system/small/ml/conftest.py @@ -232,6 +232,18 @@ def palm2_text_generator_model(session, bq_connection) -> llm.PaLM2TextGenerator return llm.PaLM2TextGenerator(session=session, connection_name=bq_connection) +@pytest.fixture(scope="session") +def palm2_text_generator_fine_tune_model( + session, bq_connection +) -> llm.PaLM2TextGenerator: + return llm.PaLM2TextGenerator( + session=session, + connection_name=bq_connection, + max_iterations=300, + evaluation_task="TEXT_GENERATION", + ) + + @pytest.fixture(scope="session") def palm2_text_generator_32k_model(session, bq_connection) -> llm.PaLM2TextGenerator: return llm.PaLM2TextGenerator( diff --git a/tests/unit/ml/test_sql.py b/tests/unit/ml/test_sql.py index 5b1ff37775..3560f05cb6 100644 --- a/tests/unit/ml/test_sql.py +++ b/tests/unit/ml/test_sql.py @@ -181,6 +181,29 @@ def test_create_model_transform_correct( ) +def test_create_llm_remote_model_correct( + model_creation_sql_generator: ml_sql.ModelCreationSqlGenerator, + mock_df: bpd.DataFrame, +): + sql = model_creation_sql_generator.create_llm_remote_model( + source_df=mock_df, + connection_name="my_project.us.my_connection", + model_ref=bigquery.ModelReference.from_string( + "test-proj._anonXYZ.create_remote_model" + ), + options={"option_key1": "option_value1", "option_key2": 2}, + ) + assert ( + sql + == """CREATE OR REPLACE MODEL `test-proj`.`_anonXYZ`.`create_remote_model` +REMOTE WITH CONNECTION `my_project.us.my_connection` +OPTIONS( + option_key1="option_value1", + option_key2=2) +AS input_X_y_sql""" + ) + + def test_create_remote_model_correct( model_creation_sql_generator: ml_sql.ModelCreationSqlGenerator, ): From 7f89428ff602516ef382d4831f65b8218dc449cc Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Tue, 16 Apr 2024 22:10:46 +0000 Subject: [PATCH 2/5] move the tests to load-testing --- tests/system/{large/ml => load}/test_llm.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/system/{large/ml => load}/test_llm.py (100%) diff --git a/tests/system/large/ml/test_llm.py b/tests/system/load/test_llm.py similarity index 100% rename from tests/system/large/ml/test_llm.py rename to tests/system/load/test_llm.py From e19f7ac2ce28814c06e21c8fe1082417b4810afc Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Wed, 17 Apr 2024 04:16:49 +0000 Subject: [PATCH 3/5] add predict tests --- tests/system/conftest.py | 14 ++++++++++++++ tests/system/load/test_llm.py | 22 ++++++++++++---------- tests/system/small/ml/conftest.py | 12 ------------ 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 2048c9751b..1eafb1a516 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -550,6 +550,20 @@ def llm_fine_tune_df_default_index( return session.read_gbq(sql) +@pytest.fixture(scope="session") +def llm_remote_text_pandas_df(): + """Additional data matching the penguins dataset, with a new index""" + return pd.DataFrame( + { + "prompt": [ + "Please do sentiment analysis on the following text and only output a number from 0 to 5where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: i feel beautifully emotional knowing that these women of whom i knew just a handful were holding me and my baba on our journey", + "Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: i was feeling a little vain when i did this one", + "Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: a father of children killed in an accident", + ], + } + ) + + @pytest.fixture(scope="session") def time_series_df_default_index( time_series_table_id: str, session: bigframes.Session diff --git a/tests/system/load/test_llm.py b/tests/system/load/test_llm.py index 1258c597a4..0882b81ccf 100644 --- a/tests/system/load/test_llm.py +++ b/tests/system/load/test_llm.py @@ -15,7 +15,9 @@ import bigframes.ml.llm -def test_llm_palm_configure_fit(llm_fine_tune_df_default_index, dataset_id): +def test_llm_palm_configure_fit( + llm_fine_tune_df_default_index, llm_remote_text_pandas_df +): model = bigframes.ml.llm.PaLM2TextGenerator( model_name="text-bison", max_iterations=1, evaluation_task="CLASSIFICATION" ) @@ -25,12 +27,12 @@ def test_llm_palm_configure_fit(llm_fine_tune_df_default_index, dataset_id): y_train = df[["label"]] model.fit(X_train, y_train) - # save, load, check parameters to ensure configuration was kept - reloaded_model = model.to_gbq( - f"{dataset_id}.temp_configured_palm_model", replace=True - ) - assert ( - f"{dataset_id}.temp_configured_palm_model" - in reloaded_model._bqml_model.model_name - ) - assert reloaded_model.evaluation_task == "CLASSIFICATION" + assert model is not None + + df = model.predict(llm_remote_text_pandas_df).to_pandas() + assert df.shape == (3, 4) + assert "ml_generate_text_llm_result" in df.columns + series = df["ml_generate_text_llm_result"] + assert all(series.str.len() == 1) + + # TODO(ashleyxu): After bqml rolled out version control: save, load, check parameters to ensure configuration was kept diff --git a/tests/system/small/ml/conftest.py b/tests/system/small/ml/conftest.py index 3d380794bc..33351afe45 100644 --- a/tests/system/small/ml/conftest.py +++ b/tests/system/small/ml/conftest.py @@ -232,18 +232,6 @@ def palm2_text_generator_model(session, bq_connection) -> llm.PaLM2TextGenerator return llm.PaLM2TextGenerator(session=session, connection_name=bq_connection) -@pytest.fixture(scope="session") -def palm2_text_generator_fine_tune_model( - session, bq_connection -) -> llm.PaLM2TextGenerator: - return llm.PaLM2TextGenerator( - session=session, - connection_name=bq_connection, - max_iterations=300, - evaluation_task="TEXT_GENERATION", - ) - - @pytest.fixture(scope="session") def palm2_text_generator_32k_model(session, bq_connection) -> llm.PaLM2TextGenerator: return llm.PaLM2TextGenerator( From a03f28405dbda59337671ac8c8c210784e752465 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Wed, 17 Apr 2024 21:33:29 +0000 Subject: [PATCH 4/5] address comments --- bigframes/ml/llm.py | 16 --------------- bigframes/ml/sql.py | 1 - tests/system/conftest.py | 27 ------------------------ tests/system/load/test_llm.py | 34 +++++++++++++++++++++++++++++-- tests/system/small/ml/test_llm.py | 2 +- 5 files changed, 33 insertions(+), 47 deletions(-) diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index 52569df161..15a11e31fb 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -29,7 +29,6 @@ _BQML_PARAMS_MAPPING = { "max_iterations": "maxIterations", - "evaluation_task": "evaluationTask", } _TEXT_GENERATOR_BISON_ENDPOINT = "text-bison" @@ -75,9 +74,6 @@ class PaLM2TextGenerator(base.BaseEstimator): permission if the connection isn't fully setup. max_iterations (Optional[int], Default to 300): The number of steps to run when performing supervised tuning. - evaluation_task (Optional[str], default to "UNSPECIFIED"): - When performing supervised tuning, the type of task that you want to tune the model to perform. Possible values: - "TEXT_GENERATION", "CLASSIFICATION", "SUMMARIZATION", "QUESTION_ANSWERING", "UNSPECIFIED". Default to "UNSPECIFIED". """ def __init__( @@ -87,18 +83,10 @@ def __init__( session: Optional[bigframes.Session] = None, connection_name: Optional[str] = None, max_iterations: int = 300, - evaluation_task: Literal[ - "UNSPECIFIED", - "TEXT_GENERATION", - "CLASSIFICATION", - "SUMMARIZATION", - "QUESTION_ANSWERING", - ] = "UNSPECIFIED", ): self.model_name = model_name self.session = session or bpd.get_global_session() self.max_iterations = max_iterations - self.evaluation_task = evaluation_task self._bq_connection_manager = self.session.bqconnectionmanager connection_name = connection_name or self.session._bq_connection @@ -169,8 +157,6 @@ def _from_bq( # Convert types if bf_param in ["max_iterations"]: kwargs[bf_param] = int(last_fitting[bqml_param]) - elif bf_param in ["evaluation_task"]: - kwargs[bf_param] = str(last_fitting[bqml_param]) text_generator_model = cls( **kwargs, @@ -187,7 +173,6 @@ def _bqml_options(self) -> dict: options = { "max_iterations": self.max_iterations, "data_split_method": "NO_SPLIT", - "evaluation_task": self.evaluation_task, } return options @@ -209,7 +194,6 @@ def fit( """ X, y = utils.convert_to_dataframe(X, y) - # TODO(ashleyxu): options= self._bqml_options options = self._bqml_options options["endpoint"] = self.model_name + "@001" options["prompt_col"] = X.columns.tolist()[0] diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py index a81790173e..59c768ce81 100644 --- a/bigframes/ml/sql.py +++ b/bigframes/ml/sql.py @@ -177,7 +177,6 @@ def create_model( parts.append(f"AS {source_sql}") return "\n".join(parts) - # Model create and alter def create_llm_remote_model( self, source_df: bpd.DataFrame, diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 1eafb1a516..70ff6eee39 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -537,33 +537,6 @@ def penguins_df_default_index( return session.read_gbq(penguins_table_id) -@pytest.fixture(scope="session") -def llm_fine_tune_df_default_index( - session: bigframes.Session, -) -> bigframes.dataframe.DataFrame: - sql = """ -SELECT - CONCAT("Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: ", text) as prompt, - CAST(label AS STRING) as label -FROM `llm_tuning.emotion_classification_train` -""" - return session.read_gbq(sql) - - -@pytest.fixture(scope="session") -def llm_remote_text_pandas_df(): - """Additional data matching the penguins dataset, with a new index""" - return pd.DataFrame( - { - "prompt": [ - "Please do sentiment analysis on the following text and only output a number from 0 to 5where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: i feel beautifully emotional knowing that these women of whom i knew just a handful were holding me and my baba on our journey", - "Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: i was feeling a little vain when i did this one", - "Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: a father of children killed in an accident", - ], - } - ) - - @pytest.fixture(scope="session") def time_series_df_default_index( time_series_table_id: str, session: bigframes.Session diff --git a/tests/system/load/test_llm.py b/tests/system/load/test_llm.py index 0882b81ccf..62ef7d5c72 100644 --- a/tests/system/load/test_llm.py +++ b/tests/system/load/test_llm.py @@ -12,14 +12,44 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pandas as pd +import pytest + import bigframes.ml.llm +@pytest.fixture(scope="session") +def llm_fine_tune_df_default_index( + session: bigframes.Session, +) -> bigframes.dataframe.DataFrame: + sql = """ +SELECT + CONCAT("Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: ", text) as prompt, + CAST(label AS STRING) as label +FROM `llm_tuning.emotion_classification_train` +""" + return session.read_gbq(sql) + + +@pytest.fixture(scope="session") +def llm_remote_text_pandas_df(): + """Additional data matching the penguins dataset, with a new index""" + return pd.DataFrame( + { + "prompt": [ + "Please do sentiment analysis on the following text and only output a number from 0 to 5where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: i feel beautifully emotional knowing that these women of whom i knew just a handful were holding me and my baba on our journey", + "Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: i was feeling a little vain when i did this one", + "Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: a father of children killed in an accident", + ], + } + ) + + def test_llm_palm_configure_fit( llm_fine_tune_df_default_index, llm_remote_text_pandas_df ): model = bigframes.ml.llm.PaLM2TextGenerator( - model_name="text-bison", max_iterations=1, evaluation_task="CLASSIFICATION" + model_name="text-bison", max_iterations=1 ) df = llm_fine_tune_df_default_index.dropna() @@ -35,4 +65,4 @@ def test_llm_palm_configure_fit( series = df["ml_generate_text_llm_result"] assert all(series.str.len() == 1) - # TODO(ashleyxu): After bqml rolled out version control: save, load, check parameters to ensure configuration was kept + # TODO(ashleyxu b/335492787): After bqml rolled out version control: save, load, check parameters to ensure configuration was kept diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py index b9e4889801..6f6b67597a 100644 --- a/tests/system/small/ml/test_llm.py +++ b/tests/system/small/ml/test_llm.py @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 2f14f13aca286998833f5f4879ca8bd68c3b9356 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Wed, 17 Apr 2024 22:16:26 +0000 Subject: [PATCH 5/5] address comments --- bigframes/ml/llm.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index 15a11e31fb..37a38cdd5c 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -55,12 +55,6 @@ class PaLM2TextGenerator(base.BaseEstimator): """PaLM2 text generator LLM model. - .. note:: - This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - Args: model_name (str, Default to "text-bison"): The model for natural language tasks. “text-bison” returns model fine-tuned to follow natural language instructions @@ -150,8 +144,8 @@ def _from_bq( kwargs: dict = {} last_fitting = model.training_runs[-1]["trainingOptions"] - dummy_arima = cls() - for bf_param, _ in dummy_arima.__dict__.items(): + dummy_text_generator = cls() + for bf_param, _ in dummy_text_generator.__dict__.items(): bqml_param = _BQML_PARAMS_MAPPING.get(bf_param) if bqml_param in last_fitting: # Convert types @@ -183,6 +177,13 @@ def fit( ) -> PaLM2TextGenerator: """Fine tune PaLM2TextGenerator model. + .. note:: + + This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the + Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" + and might have limited support. For more information, see the launch stage descriptions + (https://cloud.google.com/products#product-launch-stages). + Args: X (bigframes.dataframe.DataFrame or bigframes.series.Series): DataFrame of shape (n_samples, n_features). Training data.