From 97a948c965f4f8385b290b6fdf85df4141d4d9ed Mon Sep 17 00:00:00 2001
From: Henry Solberg <henry.j.solberg@gmail.com>
Date: Fri, 12 Apr 2024 16:58:17 -0700
Subject: [PATCH 1/5] feat: support list of numerics in pandas.cut (#580)

An internal user encountered this missing overload
---
 bigframes/ml/core.py              | 40 ++++++++++++++
 bigframes/ml/llm.py               | 86 ++++++++++++++++++++++++++++++-
 bigframes/ml/sql.py               | 18 +++++++
 tests/system/conftest.py          | 13 +++++
 tests/system/large/ml/test_llm.py | 36 +++++++++++++
 tests/system/small/ml/conftest.py | 12 +++++
 tests/unit/ml/test_sql.py         | 23 +++++++++
 7 files changed, 227 insertions(+), 1 deletion(-)
 create mode 100644 tests/system/large/ml/test_llm.py
diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py
index 04aaeec1bc..b94ae39687 100644
--- a/bigframes/ml/core.py
+++ b/bigframes/ml/core.py
@@ -321,6 +321,46 @@ def create_model(
 
         return self._create_model_with_sql(session=session, sql=sql)
 
+    def create_llm_remote_model(
+        self,
+        X_train: bpd.DataFrame,
+        y_train: bpd.DataFrame,
+        connection_name: str,
+        options: Mapping[str, Union[str, int, float, Iterable[str]]] = {},
+    ) -> BqmlModel:
+        """Create a session-temporary BQML model with the CREATE OR REPLACE MODEL statement
+
+        Args:
+            X_train: features columns for training
+            y_train: labels columns for training
+            options: a dict of options to configure the model. Generates a BQML OPTIONS
+                clause
+            connection_name:
+                a BQ connection to talk with Vertex AI, of the format <PROJECT_NUMBER>.<REGION>.<CONNECTION_NAME>. https://cloud.google.com/bigquery/docs/create-cloud-resource-connection
+
+        Returns: a BqmlModel, wrapping a trained model in BigQuery
+        """
+        options = dict(options)
+        # Cache dataframes to make sure base table is not a snapshot
+        # cached dataframe creates a full copy, never uses snapshot
+        input_data = X_train._cached(force=True).join(
+            y_train._cached(force=True), how="outer"
+        )
+        options.update({"INPUT_LABEL_COLS": y_train.columns.tolist()})
+
+        session = X_train._session
+
+        model_ref = self._create_model_ref(session._anonymous_dataset)
+
+        sql = self._model_creation_sql_generator.create_llm_remote_model(
+            source_df=input_data,
+            model_ref=model_ref,
+            options=options,
+            connection_name=connection_name,
+        )
+
+        return self._create_model_with_sql(session=session, sql=sql)
+
     def create_time_series_model(
         self,
         X_train: bpd.DataFrame,
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index 31c691fd51..52569df161 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -27,6 +27,11 @@
 from bigframes.ml import base, core, globals, utils
 import bigframes.pandas as bpd
 
+_BQML_PARAMS_MAPPING = {
+    "max_iterations": "maxIterations",
+    "evaluation_task": "evaluationTask",
+}
+
 _TEXT_GENERATOR_BISON_ENDPOINT = "text-bison"
 _TEXT_GENERATOR_BISON_32K_ENDPOINT = "text-bison-32k"
 _TEXT_GENERATOR_ENDPOINTS = (
@@ -51,6 +56,12 @@
 class PaLM2TextGenerator(base.BaseEstimator):
     """PaLM2 text generator LLM model.
 
+    .. note::
+        This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
+        Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
+        and might have limited support. For more information, see the launch stage descriptions
+        (https://cloud.google.com/products#product-launch-stages).
+
     Args:
         model_name (str, Default to "text-bison"):
             The model for natural language tasks. “text-bison” returns model fine-tuned to follow natural language instructions
@@ -62,6 +73,11 @@ class PaLM2TextGenerator(base.BaseEstimator):
             Connection to connect with remote service. str of the format <PROJECT_NUMBER/PROJECT_ID>.<LOCATION>.<CONNECTION_ID>.
             if None, use default connection in session context. BigQuery DataFrame will try to create the connection and attach
             permission if the connection isn't fully setup.
+        max_iterations (Optional[int], Default to 300):
+            The number of steps to run when performing supervised tuning.
+        evaluation_task (Optional[str], default to "UNSPECIFIED"):
+            When performing supervised tuning, the type of task that you want to tune the model to perform. Possible values:
+            "TEXT_GENERATION", "CLASSIFICATION", "SUMMARIZATION", "QUESTION_ANSWERING", "UNSPECIFIED". Default to "UNSPECIFIED".
     """
 
     def __init__(
@@ -70,9 +86,19 @@ def __init__(
         model_name: Literal["text-bison", "text-bison-32k"] = "text-bison",
         session: Optional[bigframes.Session] = None,
         connection_name: Optional[str] = None,
+        max_iterations: int = 300,
+        evaluation_task: Literal[
+            "UNSPECIFIED",
+            "TEXT_GENERATION",
+            "CLASSIFICATION",
+            "SUMMARIZATION",
+            "QUESTION_ANSWERING",
+        ] = "UNSPECIFIED",
     ):
         self.model_name = model_name
         self.session = session or bpd.get_global_session()
+        self.max_iterations = max_iterations
+        self.evaluation_task = evaluation_task
         self._bq_connection_manager = self.session.bqconnectionmanager
 
         connection_name = connection_name or self.session._bq_connection
@@ -132,12 +158,70 @@ def _from_bq(
         model_connection = model._properties["remoteModelInfo"]["connection"]
         model_endpoint = bqml_endpoint.split("/")[-1]
 
+        # Get the optional params
+        kwargs: dict = {}
+        last_fitting = model.training_runs[-1]["trainingOptions"]
+
+        dummy_arima = cls()
+        for bf_param, _ in dummy_arima.__dict__.items():
+            bqml_param = _BQML_PARAMS_MAPPING.get(bf_param)
+            if bqml_param in last_fitting:
+                # Convert types
+                if bf_param in ["max_iterations"]:
+                    kwargs[bf_param] = int(last_fitting[bqml_param])
+                elif bf_param in ["evaluation_task"]:
+                    kwargs[bf_param] = str(last_fitting[bqml_param])
+
         text_generator_model = cls(
-            session=session, model_name=model_endpoint, connection_name=model_connection
+            **kwargs,
+            session=session,
+            model_name=model_endpoint,
+            connection_name=model_connection,
         )
         text_generator_model._bqml_model = core.BqmlModel(session, model)
         return text_generator_model
 
+    @property
+    def _bqml_options(self) -> dict:
+        """The model options as they will be set for BQML"""
+        options = {
+            "max_iterations": self.max_iterations,
+            "data_split_method": "NO_SPLIT",
+            "evaluation_task": self.evaluation_task,
+        }
+        return options
+
+    def fit(
+        self,
+        X: Union[bpd.DataFrame, bpd.Series],
+        y: Union[bpd.DataFrame, bpd.Series],
+    ) -> PaLM2TextGenerator:
+        """Fine tune PaLM2TextGenerator model.
+
+        Args:
+            X (bigframes.dataframe.DataFrame or bigframes.series.Series):
+                DataFrame of shape (n_samples, n_features). Training data.
+            y (bigframes.dataframe.DataFrame or bigframes.series.Series:
+                Training labels.
+
+        Returns:
+            PaLM2TextGenerator: Fitted Estimator.
+        """
+        X, y = utils.convert_to_dataframe(X, y)
+
+        # TODO(ashleyxu): options= self._bqml_options
+        options = self._bqml_options
+        options["endpoint"] = self.model_name + "@001"
+        options["prompt_col"] = X.columns.tolist()[0]
+
+        self._bqml_model = self._bqml_model_factory.create_llm_remote_model(
+            X,
+            y,
+            options=options,
+            connection_name=self.connection_name,
+        )
+        return self
+
     def predict(
         self,
         X: Union[bpd.DataFrame, bpd.Series],
diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py
index fab358cce3..a81790173e 100644
--- a/bigframes/ml/sql.py
+++ b/bigframes/ml/sql.py
@@ -177,6 +177,24 @@ def create_model(
         parts.append(f"AS {source_sql}")
         return "\n".join(parts)
 
+    # Model create and alter
+    def create_llm_remote_model(
+        self,
+        source_df: bpd.DataFrame,
+        connection_name: str,
+        model_ref: google.cloud.bigquery.ModelReference,
+        options: Mapping[str, Union[str, int, float, Iterable[str]]] = {},
+    ) -> str:
+        """Encode the CREATE OR REPLACE MODEL statement for BQML"""
+        source_sql = source_df.sql
+
+        parts = [f"CREATE OR REPLACE MODEL {self._model_id_sql(model_ref)}"]
+        parts.append(self.connection(connection_name))
+        if options:
+            parts.append(self.options(**options))
+        parts.append(f"AS {source_sql}")
+        return "\n".join(parts)
+
     def create_remote_model(
         self,
         connection_name: str,
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 70ff6eee39..2048c9751b 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -537,6 +537,19 @@ def penguins_df_default_index(
     return session.read_gbq(penguins_table_id)
 
 
+@pytest.fixture(scope="session")
+def llm_fine_tune_df_default_index(
+    session: bigframes.Session,
+) -> bigframes.dataframe.DataFrame:
+    sql = """
+SELECT
+  CONCAT("Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: ", text) as prompt,
+  CAST(label AS STRING) as label
+FROM `llm_tuning.emotion_classification_train`
+"""
+    return session.read_gbq(sql)
+
+
 @pytest.fixture(scope="session")
 def time_series_df_default_index(
     time_series_table_id: str, session: bigframes.Session
diff --git a/tests/system/large/ml/test_llm.py b/tests/system/large/ml/test_llm.py
new file mode 100644
index 0000000000..1258c597a4
--- /dev/null
+++ b/tests/system/large/ml/test_llm.py
@@ -0,0 +1,36 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import bigframes.ml.llm
+
+
+def test_llm_palm_configure_fit(llm_fine_tune_df_default_index, dataset_id):
+    model = bigframes.ml.llm.PaLM2TextGenerator(
+        model_name="text-bison", max_iterations=1, evaluation_task="CLASSIFICATION"
+    )
+
+    df = llm_fine_tune_df_default_index.dropna()
+    X_train = df[["prompt"]]
+    y_train = df[["label"]]
+    model.fit(X_train, y_train)
+
+    # save, load, check parameters to ensure configuration was kept
+    reloaded_model = model.to_gbq(
+        f"{dataset_id}.temp_configured_palm_model", replace=True
+    )
+    assert (
+        f"{dataset_id}.temp_configured_palm_model"
+        in reloaded_model._bqml_model.model_name
+    )
+    assert reloaded_model.evaluation_task == "CLASSIFICATION"
diff --git a/tests/system/small/ml/conftest.py b/tests/system/small/ml/conftest.py
index 33351afe45..3d380794bc 100644
--- a/tests/system/small/ml/conftest.py
+++ b/tests/system/small/ml/conftest.py
@@ -232,6 +232,18 @@ def palm2_text_generator_model(session, bq_connection) -> llm.PaLM2TextGenerator
     return llm.PaLM2TextGenerator(session=session, connection_name=bq_connection)
 
 
+@pytest.fixture(scope="session")
+def palm2_text_generator_fine_tune_model(
+    session, bq_connection
+) -> llm.PaLM2TextGenerator:
+    return llm.PaLM2TextGenerator(
+        session=session,
+        connection_name=bq_connection,
+        max_iterations=300,
+        evaluation_task="TEXT_GENERATION",
+    )
+
+
 @pytest.fixture(scope="session")
 def palm2_text_generator_32k_model(session, bq_connection) -> llm.PaLM2TextGenerator:
     return llm.PaLM2TextGenerator(
diff --git a/tests/unit/ml/test_sql.py b/tests/unit/ml/test_sql.py
index 5b1ff37775..3560f05cb6 100644
--- a/tests/unit/ml/test_sql.py
+++ b/tests/unit/ml/test_sql.py
@@ -181,6 +181,29 @@ def test_create_model_transform_correct(
     )
 
 
+def test_create_llm_remote_model_correct(
+    model_creation_sql_generator: ml_sql.ModelCreationSqlGenerator,
+    mock_df: bpd.DataFrame,
+):
+    sql = model_creation_sql_generator.create_llm_remote_model(
+        source_df=mock_df,
+        connection_name="my_project.us.my_connection",
+        model_ref=bigquery.ModelReference.from_string(
+            "test-proj._anonXYZ.create_remote_model"
+        ),
+        options={"option_key1": "option_value1", "option_key2": 2},
+    )
+    assert (
+        sql
+        == """CREATE OR REPLACE MODEL `test-proj`.`_anonXYZ`.`create_remote_model`
+REMOTE WITH CONNECTION `my_project.us.my_connection`
+OPTIONS(
+  option_key1="option_value1",
+  option_key2=2)
+AS input_X_y_sql"""
+    )
+
+
 def test_create_remote_model_correct(
     model_creation_sql_generator: ml_sql.ModelCreationSqlGenerator,
 ):

From 7f89428ff602516ef382d4831f65b8218dc449cc Mon Sep 17 00:00:00 2001
From: Ashley Xu <ashleyxu@google.com>
Date: Tue, 16 Apr 2024 22:10:46 +0000
Subject: [PATCH 2/5] move the tests to load-testing

---
 tests/system/{large/ml => load}/test_llm.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/system/{large/ml => load}/test_llm.py (100%)

diff --git a/tests/system/large/ml/test_llm.py b/tests/system/load/test_llm.py
similarity index 100%
rename from tests/system/large/ml/test_llm.py
rename to tests/system/load/test_llm.py

From e19f7ac2ce28814c06e21c8fe1082417b4810afc Mon Sep 17 00:00:00 2001
From: Ashley Xu <ashleyxu@google.com>
Date: Wed, 17 Apr 2024 04:16:49 +0000
Subject: [PATCH 3/5] add predict tests

---
 tests/system/conftest.py          | 14 ++++++++++++++
 tests/system/load/test_llm.py     | 22 ++++++++++++----------
 tests/system/small/ml/conftest.py | 12 ------------
 3 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 2048c9751b..1eafb1a516 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -550,6 +550,20 @@ def llm_fine_tune_df_default_index(
     return session.read_gbq(sql)
 
 
+@pytest.fixture(scope="session")
+def llm_remote_text_pandas_df():
+    """Additional data matching the penguins dataset, with a new index"""
+    return pd.DataFrame(
+        {
+            "prompt": [
+                "Please do sentiment analysis on the following text and only output a number from 0 to 5where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: i feel beautifully emotional knowing that these women of whom i knew just a handful were holding me and my baba on our journey",
+                "Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: i was feeling a little vain when i did this one",
+                "Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: a father of children killed in an accident",
+            ],
+        }
+    )
+
+
 @pytest.fixture(scope="session")
 def time_series_df_default_index(
     time_series_table_id: str, session: bigframes.Session
diff --git a/tests/system/load/test_llm.py b/tests/system/load/test_llm.py
index 1258c597a4..0882b81ccf 100644
--- a/tests/system/load/test_llm.py
+++ b/tests/system/load/test_llm.py
@@ -15,7 +15,9 @@
 import bigframes.ml.llm
 
 
-def test_llm_palm_configure_fit(llm_fine_tune_df_default_index, dataset_id):
+def test_llm_palm_configure_fit(
+    llm_fine_tune_df_default_index, llm_remote_text_pandas_df
+):
     model = bigframes.ml.llm.PaLM2TextGenerator(
         model_name="text-bison", max_iterations=1, evaluation_task="CLASSIFICATION"
     )
@@ -25,12 +27,12 @@ def test_llm_palm_configure_fit(llm_fine_tune_df_default_index, dataset_id):
     y_train = df[["label"]]
     model.fit(X_train, y_train)
 
-    # save, load, check parameters to ensure configuration was kept
-    reloaded_model = model.to_gbq(
-        f"{dataset_id}.temp_configured_palm_model", replace=True
-    )
-    assert (
-        f"{dataset_id}.temp_configured_palm_model"
-        in reloaded_model._bqml_model.model_name
-    )
-    assert reloaded_model.evaluation_task == "CLASSIFICATION"
+    assert model is not None
+
+    df = model.predict(llm_remote_text_pandas_df).to_pandas()
+    assert df.shape == (3, 4)
+    assert "ml_generate_text_llm_result" in df.columns
+    series = df["ml_generate_text_llm_result"]
+    assert all(series.str.len() == 1)
+
+    # TODO(ashleyxu): After bqml rolled out version control: save, load, check parameters to ensure configuration was kept
diff --git a/tests/system/small/ml/conftest.py b/tests/system/small/ml/conftest.py
index 3d380794bc..33351afe45 100644
--- a/tests/system/small/ml/conftest.py
+++ b/tests/system/small/ml/conftest.py
@@ -232,18 +232,6 @@ def palm2_text_generator_model(session, bq_connection) -> llm.PaLM2TextGenerator
     return llm.PaLM2TextGenerator(session=session, connection_name=bq_connection)
 
 
-@pytest.fixture(scope="session")
-def palm2_text_generator_fine_tune_model(
-    session, bq_connection
-) -> llm.PaLM2TextGenerator:
-    return llm.PaLM2TextGenerator(
-        session=session,
-        connection_name=bq_connection,
-        max_iterations=300,
-        evaluation_task="TEXT_GENERATION",
-    )
-
-
 @pytest.fixture(scope="session")
 def palm2_text_generator_32k_model(session, bq_connection) -> llm.PaLM2TextGenerator:
     return llm.PaLM2TextGenerator(

From a03f28405dbda59337671ac8c8c210784e752465 Mon Sep 17 00:00:00 2001
From: Ashley Xu <ashleyxu@google.com>
Date: Wed, 17 Apr 2024 21:33:29 +0000
Subject: [PATCH 4/5] address comments

---
 bigframes/ml/llm.py               | 16 ---------------
 bigframes/ml/sql.py               |  1 -
 tests/system/conftest.py          | 27 ------------------------
 tests/system/load/test_llm.py     | 34 +++++++++++++++++++++++++++++--
 tests/system/small/ml/test_llm.py |  2 +-
 5 files changed, 33 insertions(+), 47 deletions(-)

diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index 52569df161..15a11e31fb 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -29,7 +29,6 @@
 
 _BQML_PARAMS_MAPPING = {
     "max_iterations": "maxIterations",
-    "evaluation_task": "evaluationTask",
 }
 
 _TEXT_GENERATOR_BISON_ENDPOINT = "text-bison"
@@ -75,9 +74,6 @@ class PaLM2TextGenerator(base.BaseEstimator):
             permission if the connection isn't fully setup.
         max_iterations (Optional[int], Default to 300):
             The number of steps to run when performing supervised tuning.
-        evaluation_task (Optional[str], default to "UNSPECIFIED"):
-            When performing supervised tuning, the type of task that you want to tune the model to perform. Possible values:
-            "TEXT_GENERATION", "CLASSIFICATION", "SUMMARIZATION", "QUESTION_ANSWERING", "UNSPECIFIED". Default to "UNSPECIFIED".
     """
 
     def __init__(
@@ -87,18 +83,10 @@ def __init__(
         session: Optional[bigframes.Session] = None,
         connection_name: Optional[str] = None,
         max_iterations: int = 300,
-        evaluation_task: Literal[
-            "UNSPECIFIED",
-            "TEXT_GENERATION",
-            "CLASSIFICATION",
-            "SUMMARIZATION",
-            "QUESTION_ANSWERING",
-        ] = "UNSPECIFIED",
     ):
         self.model_name = model_name
         self.session = session or bpd.get_global_session()
         self.max_iterations = max_iterations
-        self.evaluation_task = evaluation_task
         self._bq_connection_manager = self.session.bqconnectionmanager
 
         connection_name = connection_name or self.session._bq_connection
@@ -169,8 +157,6 @@ def _from_bq(
                 # Convert types
                 if bf_param in ["max_iterations"]:
                     kwargs[bf_param] = int(last_fitting[bqml_param])
-                elif bf_param in ["evaluation_task"]:
-                    kwargs[bf_param] = str(last_fitting[bqml_param])
 
         text_generator_model = cls(
             **kwargs,
@@ -187,7 +173,6 @@ def _bqml_options(self) -> dict:
         options = {
             "max_iterations": self.max_iterations,
             "data_split_method": "NO_SPLIT",
-            "evaluation_task": self.evaluation_task,
         }
         return options
 
@@ -209,7 +194,6 @@ def fit(
         """
         X, y = utils.convert_to_dataframe(X, y)
 
-        # TODO(ashleyxu): options= self._bqml_options
         options = self._bqml_options
         options["endpoint"] = self.model_name + "@001"
         options["prompt_col"] = X.columns.tolist()[0]
diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py
index a81790173e..59c768ce81 100644
--- a/bigframes/ml/sql.py
+++ b/bigframes/ml/sql.py
@@ -177,7 +177,6 @@ def create_model(
         parts.append(f"AS {source_sql}")
         return "\n".join(parts)
 
-    # Model create and alter
     def create_llm_remote_model(
         self,
         source_df: bpd.DataFrame,
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 1eafb1a516..70ff6eee39 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -537,33 +537,6 @@ def penguins_df_default_index(
     return session.read_gbq(penguins_table_id)
 
 
-@pytest.fixture(scope="session")
-def llm_fine_tune_df_default_index(
-    session: bigframes.Session,
-) -> bigframes.dataframe.DataFrame:
-    sql = """
-SELECT
-  CONCAT("Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: ", text) as prompt,
-  CAST(label AS STRING) as label
-FROM `llm_tuning.emotion_classification_train`
-"""
-    return session.read_gbq(sql)
-
-
-@pytest.fixture(scope="session")
-def llm_remote_text_pandas_df():
-    """Additional data matching the penguins dataset, with a new index"""
-    return pd.DataFrame(
-        {
-            "prompt": [
-                "Please do sentiment analysis on the following text and only output a number from 0 to 5where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: i feel beautifully emotional knowing that these women of whom i knew just a handful were holding me and my baba on our journey",
-                "Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: i was feeling a little vain when i did this one",
-                "Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: a father of children killed in an accident",
-            ],
-        }
-    )
-
-
 @pytest.fixture(scope="session")
 def time_series_df_default_index(
     time_series_table_id: str, session: bigframes.Session
diff --git a/tests/system/load/test_llm.py b/tests/system/load/test_llm.py
index 0882b81ccf..62ef7d5c72 100644
--- a/tests/system/load/test_llm.py
+++ b/tests/system/load/test_llm.py
@@ -12,14 +12,44 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import pandas as pd
+import pytest
+
 import bigframes.ml.llm
 
 
+@pytest.fixture(scope="session")
+def llm_fine_tune_df_default_index(
+    session: bigframes.Session,
+) -> bigframes.dataframe.DataFrame:
+    sql = """
+SELECT
+  CONCAT("Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: ", text) as prompt,
+  CAST(label AS STRING) as label
+FROM `llm_tuning.emotion_classification_train`
+"""
+    return session.read_gbq(sql)
+
+
+@pytest.fixture(scope="session")
+def llm_remote_text_pandas_df():
+    """Additional data matching the penguins dataset, with a new index"""
+    return pd.DataFrame(
+        {
+            "prompt": [
+                "Please do sentiment analysis on the following text and only output a number from 0 to 5where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: i feel beautifully emotional knowing that these women of whom i knew just a handful were holding me and my baba on our journey",
+                "Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: i was feeling a little vain when i did this one",
+                "Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: a father of children killed in an accident",
+            ],
+        }
+    )
+
+
 def test_llm_palm_configure_fit(
     llm_fine_tune_df_default_index, llm_remote_text_pandas_df
 ):
     model = bigframes.ml.llm.PaLM2TextGenerator(
-        model_name="text-bison", max_iterations=1, evaluation_task="CLASSIFICATION"
+        model_name="text-bison", max_iterations=1
     )
 
     df = llm_fine_tune_df_default_index.dropna()
@@ -35,4 +65,4 @@ def test_llm_palm_configure_fit(
     series = df["ml_generate_text_llm_result"]
     assert all(series.str.len() == 1)
 
-    # TODO(ashleyxu): After bqml rolled out version control: save, load, check parameters to ensure configuration was kept
+    # TODO(ashleyxu b/335492787): After bqml rolled out version control: save, load, check parameters to ensure configuration was kept
diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py
index b9e4889801..6f6b67597a 100644
--- a/tests/system/small/ml/test_llm.py
+++ b/tests/system/small/ml/test_llm.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC
+# Copyright 2024 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 2f14f13aca286998833f5f4879ca8bd68c3b9356 Mon Sep 17 00:00:00 2001
From: Ashley Xu <ashleyxu@google.com>
Date: Wed, 17 Apr 2024 22:16:26 +0000
Subject: [PATCH 5/5] address comments

---
 bigframes/ml/llm.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index 15a11e31fb..37a38cdd5c 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -55,12 +55,6 @@
 class PaLM2TextGenerator(base.BaseEstimator):
     """PaLM2 text generator LLM model.
 
-    .. note::
-        This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
-        Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
-        and might have limited support. For more information, see the launch stage descriptions
-        (https://cloud.google.com/products#product-launch-stages).
-
     Args:
         model_name (str, Default to "text-bison"):
             The model for natural language tasks. “text-bison” returns model fine-tuned to follow natural language instructions
@@ -150,8 +144,8 @@ def _from_bq(
         kwargs: dict = {}
         last_fitting = model.training_runs[-1]["trainingOptions"]
 
-        dummy_arima = cls()
-        for bf_param, _ in dummy_arima.__dict__.items():
+        dummy_text_generator = cls()
+        for bf_param, _ in dummy_text_generator.__dict__.items():
             bqml_param = _BQML_PARAMS_MAPPING.get(bf_param)
             if bqml_param in last_fitting:
                 # Convert types
@@ -183,6 +177,13 @@ def fit(
     ) -> PaLM2TextGenerator:
         """Fine tune PaLM2TextGenerator model.
 
+        .. note::
+
+            This product or feature is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
+            Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
+            and might have limited support. For more information, see the launch stage descriptions
+            (https://cloud.google.com/products#product-launch-stages).
+
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series):
                 DataFrame of shape (n_samples, n_features). Training data.