-
Notifications
You must be signed in to change notification settings - Fork 324
Closed
Labels
api: bigqueryIssues related to the googleapis/python-bigquery API.Issues related to the googleapis/python-bigquery API.type: feature request‘Nice-to-have’ improvement, new feature or different behavior or design.‘Nice-to-have’ improvement, new feature or different behavior or design.
Description
Is your feature request related to a problem? Please describe.
I'm working on an alternative loading mechanism in BigQuery DataFrames based on the streaming API to avoid quota limits for load jobs.
Describe the solution you'd like
import pandas
import google.cloud.bigquery
df = pandas.DataFrame(
{
"bool_col": pandas.Series([True, False, True, pandas.NA, False, False, True, True, False], dtype="boolean"),
}
)
bqclient = google.cloud.bigquery.Client()
bqclient.insert_rows_from_dataframe(bqclient.get_table("my_dataset.my_table"), df)This results in the following error:
In [11]: bqclient.insert_rows_from_dataframe(bqclient.get_table("my_dataset.my_table"), df)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [11], in <cell line: 1>()
----> 1 bqclient.insert_rows_from_dataframe(bqclient.get_table("my_dataset.my_table"), df)
File ~/src/github.com/googleapis/python-bigquery/google/cloud/bigquery/client.py:3662, in Client.insert_rows_from_dataframe(self, table, dataframe, selected_fields, chunk_size, **kwargs)
3660 for _ in range(chunk_count):
3661 rows_chunk = itertools.islice(rows_iter, chunk_size)
-> 3662 result = self.insert_rows(table, rows_chunk, selected_fields, **kwargs)
3663 insert_results.append(result)
3665 return insert_results
File ~/src/github.com/googleapis/python-bigquery/google/cloud/bigquery/client.py:3605, in Client.insert_rows(self, table, rows, selected_fields, **kwargs)
3596 raise ValueError(
3597 (
3598 "Could not determine schema for table '{}'. Call client.get_table() "
3599 "or pass in a list of schema fields to the selected_fields argument."
3600 ).format(table)
3601 )
3603 json_rows = [_record_field_to_json(schema, row) for row in rows]
-> 3605 return self.insert_rows_json(table, json_rows, **kwargs)
File ~/src/github.com/googleapis/python-bigquery/google/cloud/bigquery/client.py:3801, in Client.insert_rows_json(self, table, json_rows, row_ids, skip_invalid_rows, ignore_unknown_values, template_suffix, retry, timeout)
3799 # We can always retry, because every row has an insert ID.
3800 span_attributes = {"path": path}
-> 3801 response = self._call_api(
3802 retry,
3803 span_name="BigQuery.insertRowsJson",
3804 span_attributes=span_attributes,
3805 method="POST",
3806 path=path,
3807 data=data,
3808 timeout=timeout,
3809 )
3810 errors = []
3812 for error in response.get("insertErrors", ()):
File ~/src/github.com/googleapis/python-bigquery/google/cloud/bigquery/client.py:827, in Client._call_api(self, retry, span_name, span_attributes, job_ref, headers, **kwargs)
823 if span_name is not None:
824 with create_span(
825 name=span_name, attributes=span_attributes, client=self, job_ref=job_ref
826 ):
--> 827 return call()
829 return call()
File /opt/miniconda3/envs/dev-3.10/lib/python3.10/site-packages/google/api_core/retry.py:349, in Retry.__call__.<locals>.retry_wrapped_func(*args, **kwargs)
345 target = functools.partial(func, *args, **kwargs)
346 sleep_generator = exponential_sleep_generator(
347 self._initial, self._maximum, multiplier=self._multiplier
348 )
--> 349 return retry_target(
350 target,
351 self._predicate,
352 sleep_generator,
353 self._timeout,
354 on_error=on_error,
355 )
File /opt/miniconda3/envs/dev-3.10/lib/python3.10/site-packages/google/api_core/retry.py:191, in retry_target(target, predicate, sleep_generator, timeout, on_error, **kwargs)
189 for sleep in sleep_generator:
190 try:
--> 191 return target()
193 # pylint: disable=broad-except
194 # This function explicitly must deal with broad exceptions.
195 except Exception as exc:
File /opt/miniconda3/envs/dev-3.10/lib/python3.10/site-packages/google/cloud/_http/__init__.py:479, in JSONConnection.api_request(self, method, path, query_params, data, content_type, headers, api_base_url, api_version, expect_json, _target_object, timeout, extra_api_info)
476 # Making the executive decision that any dictionary
477 # data will be sent properly as JSON.
478 if data and isinstance(data, dict):
--> 479 data = json.dumps(data)
480 content_type = "application/json"
482 response = self._make_request(
483 method=method,
484 url=url,
(...)
490 extra_api_info=extra_api_info,
491 )
File /opt/miniconda3/envs/dev-3.10/lib/python3.10/json/__init__.py:231, in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
226 # cached encoder
227 if (not skipkeys and ensure_ascii and
228 check_circular and allow_nan and
229 cls is None and indent is None and separators is None and
230 default is None and not sort_keys and not kw):
--> 231 return _default_encoder.encode(obj)
232 if cls is None:
233 cls = JSONEncoder
File /opt/miniconda3/envs/dev-3.10/lib/python3.10/json/encoder.py:199, in JSONEncoder.encode(self, o)
195 return encode_basestring(o)
196 # This doesn't pass the iterator directly to ''.join() because the
197 # exceptions aren't as detailed. The list call should be roughly
198 # equivalent to the PySequence_Fast that ''.join() would do.
--> 199 chunks = self.iterencode(o, _one_shot=True)
200 if not isinstance(chunks, (list, tuple)):
201 chunks = list(chunks)
File /opt/miniconda3/envs/dev-3.10/lib/python3.10/json/encoder.py:257, in JSONEncoder.iterencode(self, o, _one_shot)
252 else:
253 _iterencode = _make_iterencode(
254 markers, self.default, _encoder, self.indent, floatstr,
255 self.key_separator, self.item_separator, self.sort_keys,
256 self.skipkeys, _one_shot)
--> 257 return _iterencode(o, 0)
File /opt/miniconda3/envs/dev-3.10/lib/python3.10/json/encoder.py:179, in JSONEncoder.default(self, o)
160 def default(self, o):
161 """Implement this method in a subclass such that it returns
162 a serializable object for ``o``, or calls the base implementation
163 (to raise a ``TypeError``).
(...)
177
178 """
--> 179 raise TypeError(f'Object of type {o.__class__.__name__} '
180 f'is not JSON serializable')
TypeError: Object of type bool_ is not JSON serializable
Describe alternatives you've considered
- Load jobs, but these have quota limitations and I'd like to provide an alternative.
- Pulling even more into BigQuery DataFrames
third_party, but I'd like to contribute this fix here, instead.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
api: bigqueryIssues related to the googleapis/python-bigquery API.Issues related to the googleapis/python-bigquery API.type: feature request‘Nice-to-have’ improvement, new feature or different behavior or design.‘Nice-to-have’ improvement, new feature or different behavior or design.