insert_rows_from_dataframe doesn't support nullable boolean or nullable Int64 dtypes

 **Is your feature request related to a problem? Please describe.**

I'm working on an alternative loading mechanism in BigQuery DataFrames based on the streaming API to avoid quota limits for load jobs.

 **Describe the solution you'd like**

```python
import pandas
import google.cloud.bigquery

df = pandas.DataFrame(
  {
    "bool_col": pandas.Series([True, False, True, pandas.NA, False, False, True, True, False], dtype="boolean"),
  }
)

bqclient = google.cloud.bigquery.Client()
bqclient.insert_rows_from_dataframe(bqclient.get_table("my_dataset.my_table"), df)
```

This results in the following error:

```
In [11]: bqclient.insert_rows_from_dataframe(bqclient.get_table("my_dataset.my_table"), df)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [11], in <cell line: 1>()
----> 1 bqclient.insert_rows_from_dataframe(bqclient.get_table("my_dataset.my_table"), df)

File ~/src/github.com/googleapis/python-bigquery/google/cloud/bigquery/client.py:3662, in Client.insert_rows_from_dataframe(self, table, dataframe, selected_fields, chunk_size, **kwargs)
   3660 for _ in range(chunk_count):
   3661     rows_chunk = itertools.islice(rows_iter, chunk_size)
-> 3662     result = self.insert_rows(table, rows_chunk, selected_fields, **kwargs)
   3663     insert_results.append(result)
   3665 return insert_results

File ~/src/github.com/googleapis/python-bigquery/google/cloud/bigquery/client.py:3605, in Client.insert_rows(self, table, rows, selected_fields, **kwargs)
   3596     raise ValueError(
   3597         (
   3598             "Could not determine schema for table '{}'. Call client.get_table() "
   3599             "or pass in a list of schema fields to the selected_fields argument."
   3600         ).format(table)
   3601     )
   3603 json_rows = [_record_field_to_json(schema, row) for row in rows]
-> 3605 return self.insert_rows_json(table, json_rows, **kwargs)

File ~/src/github.com/googleapis/python-bigquery/google/cloud/bigquery/client.py:3801, in Client.insert_rows_json(self, table, json_rows, row_ids, skip_invalid_rows, ignore_unknown_values, template_suffix, retry, timeout)
   3799 # We can always retry, because every row has an insert ID.
   3800 span_attributes = {"path": path}
-> 3801 response = self._call_api(
   3802     retry,
   3803     span_name="BigQuery.insertRowsJson",
   3804     span_attributes=span_attributes,
   3805     method="POST",
   3806     path=path,
   3807     data=data,
   3808     timeout=timeout,
   3809 )
   3810 errors = []
   3812 for error in response.get("insertErrors", ()):

File ~/src/github.com/googleapis/python-bigquery/google/cloud/bigquery/client.py:827, in Client._call_api(self, retry, span_name, span_attributes, job_ref, headers, **kwargs)
    823 if span_name is not None:
    824     with create_span(
    825         name=span_name, attributes=span_attributes, client=self, job_ref=job_ref
    826     ):
--> 827         return call()
    829 return call()

File /opt/miniconda3/envs/dev-3.10/lib/python3.10/site-packages/google/api_core/retry.py:349, in Retry.__call__.<locals>.retry_wrapped_func(*args, **kwargs)
    345 target = functools.partial(func, *args, **kwargs)
    346 sleep_generator = exponential_sleep_generator(
    347     self._initial, self._maximum, multiplier=self._multiplier
    348 )
--> 349 return retry_target(
    350     target,
    351     self._predicate,
    352     sleep_generator,
    353     self._timeout,
    354     on_error=on_error,
    355 )

File /opt/miniconda3/envs/dev-3.10/lib/python3.10/site-packages/google/api_core/retry.py:191, in retry_target(target, predicate, sleep_generator, timeout, on_error, **kwargs)
    189 for sleep in sleep_generator:
    190     try:
--> 191         return target()
    193     # pylint: disable=broad-except
    194     # This function explicitly must deal with broad exceptions.
    195     except Exception as exc:

File /opt/miniconda3/envs/dev-3.10/lib/python3.10/site-packages/google/cloud/_http/__init__.py:479, in JSONConnection.api_request(self, method, path, query_params, data, content_type, headers, api_base_url, api_version, expect_json, _target_object, timeout, extra_api_info)
    476 # Making the executive decision that any dictionary
    477 # data will be sent properly as JSON.
    478 if data and isinstance(data, dict):
--> 479     data = json.dumps(data)
    480     content_type = "application/json"
    482 response = self._make_request(
    483     method=method,
    484     url=url,
   (...)
    490     extra_api_info=extra_api_info,
    491 )

File /opt/miniconda3/envs/dev-3.10/lib/python3.10/json/__init__.py:231, in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
    226 # cached encoder
    227 if (not skipkeys and ensure_ascii and
    228     check_circular and allow_nan and
    229     cls is None and indent is None and separators is None and
    230     default is None and not sort_keys and not kw):
--> 231     return _default_encoder.encode(obj)
    232 if cls is None:
    233     cls = JSONEncoder

File /opt/miniconda3/envs/dev-3.10/lib/python3.10/json/encoder.py:199, in JSONEncoder.encode(self, o)
    195         return encode_basestring(o)
    196 # This doesn't pass the iterator directly to ''.join() because the
    197 # exceptions aren't as detailed.  The list call should be roughly
    198 # equivalent to the PySequence_Fast that ''.join() would do.
--> 199 chunks = self.iterencode(o, _one_shot=True)
    200 if not isinstance(chunks, (list, tuple)):
    201     chunks = list(chunks)

File /opt/miniconda3/envs/dev-3.10/lib/python3.10/json/encoder.py:257, in JSONEncoder.iterencode(self, o, _one_shot)
    252 else:
    253     _iterencode = _make_iterencode(
    254         markers, self.default, _encoder, self.indent, floatstr,
    255         self.key_separator, self.item_separator, self.sort_keys,
    256         self.skipkeys, _one_shot)
--> 257 return _iterencode(o, 0)

File /opt/miniconda3/envs/dev-3.10/lib/python3.10/json/encoder.py:179, in JSONEncoder.default(self, o)
    160 def default(self, o):
    161     """Implement this method in a subclass such that it returns
    162     a serializable object for ``o``, or calls the base implementation
    163     (to raise a ``TypeError``).
   (...)
    177 
    178     """
--> 179     raise TypeError(f'Object of type {o.__class__.__name__} '
    180                     f'is not JSON serializable')

TypeError: Object of type bool_ is not JSON serializable
```

 **Describe alternatives you've considered**

* Load jobs, but these have quota limitations and I'd like to provide an alternative.
* Pulling even more into BigQuery DataFrames `third_party`, but I'd like to contribute this fix here, instead.


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

insert_rows_from_dataframe doesn't support nullable boolean or nullable Int64 dtypes #1815

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

insert_rows_from_dataframe doesn't support nullable boolean or nullable Int64 dtypes #1815

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions