Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/io/gbq.py : 32%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1""" Google BigQuery support """
2from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
4from pandas.compat._optional import import_optional_dependency
6if TYPE_CHECKING:
7 from pandas import DataFrame
10def _try_import():
11 # since pandas is a dependency of pandas-gbq
12 # we need to import on first use
13 msg = (
14 "pandas-gbq is required to load data from Google BigQuery. "
15 "See the docs: https://pandas-gbq.readthedocs.io."
16 )
17 pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg)
18 return pandas_gbq
21def read_gbq(
22 query: str,
23 project_id: Optional[str] = None,
24 index_col: Optional[str] = None,
25 col_order: Optional[List[str]] = None,
26 reauth: bool = False,
27 auth_local_webserver: bool = False,
28 dialect: Optional[str] = None,
29 location: Optional[str] = None,
30 configuration: Optional[Dict[str, Any]] = None,
31 credentials=None,
32 use_bqstorage_api: Optional[bool] = None,
33 private_key=None,
34 verbose=None,
35 progress_bar_type: Optional[str] = None,
36) -> "DataFrame":
37 """
38 Load data from Google BigQuery.
40 This function requires the `pandas-gbq package
41 <https://pandas-gbq.readthedocs.io>`__.
43 See the `How to authenticate with Google BigQuery
44 <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__
45 guide for authentication instructions.
47 Parameters
48 ----------
49 query : str
50 SQL-Like Query to return data values.
51 project_id : str, optional
52 Google BigQuery Account project ID. Optional when available from
53 the environment.
54 index_col : str, optional
55 Name of result column to use for index in results DataFrame.
56 col_order : list(str), optional
57 List of BigQuery column names in the desired order for results
58 DataFrame.
59 reauth : bool, default False
60 Force Google BigQuery to re-authenticate the user. This is useful
61 if multiple accounts are used.
62 auth_local_webserver : bool, default False
63 Use the `local webserver flow`_ instead of the `console flow`_
64 when getting user credentials.
66 .. _local webserver flow:
67 http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
68 .. _console flow:
69 http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
71 *New in version 0.2.0 of pandas-gbq*.
72 dialect : str, default 'legacy'
73 Note: The default value is changing to 'standard' in a future version.
75 SQL syntax dialect to use. Value can be one of:
77 ``'legacy'``
78 Use BigQuery's legacy SQL dialect. For more information see
79 `BigQuery Legacy SQL Reference
80 <https://cloud.google.com/bigquery/docs/reference/legacy-sql>`__.
81 ``'standard'``
82 Use BigQuery's standard SQL, which is
83 compliant with the SQL 2011 standard. For more information
84 see `BigQuery Standard SQL Reference
85 <https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__.
87 .. versionchanged:: 0.24.0
88 location : str, optional
89 Location where the query job should run. See the `BigQuery locations
90 documentation
91 <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a
92 list of available locations. The location must match that of any
93 datasets used in the query.
95 *New in version 0.5.0 of pandas-gbq*.
96 configuration : dict, optional
97 Query config parameters for job processing.
98 For example:
100 configuration = {'query': {'useQueryCache': False}}
102 For more information see `BigQuery REST API Reference
103 <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__.
104 credentials : google.auth.credentials.Credentials, optional
105 Credentials for accessing Google APIs. Use this parameter to override
106 default credentials, such as to use Compute Engine
107 :class:`google.auth.compute_engine.Credentials` or Service Account
108 :class:`google.oauth2.service_account.Credentials` directly.
110 *New in version 0.8.0 of pandas-gbq*.
112 .. versionadded:: 0.24.0
113 use_bqstorage_api : bool, default False
114 Use the `BigQuery Storage API
115 <https://cloud.google.com/bigquery/docs/reference/storage/>`__ to
116 download query results quickly, but at an increased cost. To use this
117 API, first `enable it in the Cloud Console
118 <https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__.
119 You must also have the `bigquery.readsessions.create
120 <https://cloud.google.com/bigquery/docs/access-control#roles>`__
121 permission on the project you are billing queries to.
123 This feature requires version 0.10.0 or later of the ``pandas-gbq``
124 package. It also requires the ``google-cloud-bigquery-storage`` and
125 ``fastavro`` packages.
127 .. versionadded:: 0.25.0
128 progress_bar_type : Optional, str
129 If set, use the `tqdm <https://tqdm.github.io/>`__ library to
130 display a progress bar while the data downloads. Install the
131 ``tqdm`` package to use this feature.
133 Possible values of ``progress_bar_type`` include:
135 ``None``
136 No progress bar.
137 ``'tqdm'``
138 Use the :func:`tqdm.tqdm` function to print a progress bar
139 to :data:`sys.stderr`.
140 ``'tqdm_notebook'``
141 Use the :func:`tqdm.tqdm_notebook` function to display a
142 progress bar as a Jupyter notebook widget.
143 ``'tqdm_gui'``
144 Use the :func:`tqdm.tqdm_gui` function to display a
145 progress bar as a graphical dialog box.
147 Note that his feature requires version 0.12.0 or later of the
148 ``pandas-gbq`` package. And it requires the ``tqdm`` package. Slightly
149 different than ``pandas-gbq``, here the default is ``None``.
151 .. versionadded:: 1.0.0
153 Returns
154 -------
155 df: DataFrame
156 DataFrame representing results of query.
158 See Also
159 --------
160 pandas_gbq.read_gbq : This function in the pandas-gbq library.
161 DataFrame.to_gbq : Write a DataFrame to Google BigQuery.
162 """
163 pandas_gbq = _try_import()
165 kwargs: Dict[str, Union[str, bool]] = {}
167 # START: new kwargs. Don't populate unless explicitly set.
168 if use_bqstorage_api is not None:
169 kwargs["use_bqstorage_api"] = use_bqstorage_api
171 if progress_bar_type is not None:
172 kwargs["progress_bar_type"] = progress_bar_type
173 # END: new kwargs
175 return pandas_gbq.read_gbq(
176 query,
177 project_id=project_id,
178 index_col=index_col,
179 col_order=col_order,
180 reauth=reauth,
181 auth_local_webserver=auth_local_webserver,
182 dialect=dialect,
183 location=location,
184 configuration=configuration,
185 credentials=credentials,
186 **kwargs,
187 )
190def to_gbq(
191 dataframe: "DataFrame",
192 destination_table: str,
193 project_id: Optional[str] = None,
194 chunksize: Optional[int] = None,
195 reauth: bool = False,
196 if_exists: str = "fail",
197 auth_local_webserver: bool = False,
198 table_schema: Optional[List[Dict[str, str]]] = None,
199 location: Optional[str] = None,
200 progress_bar: bool = True,
201 credentials=None,
202 verbose=None,
203 private_key=None,
204) -> None:
205 pandas_gbq = _try_import()
206 pandas_gbq.to_gbq(
207 dataframe,
208 destination_table,
209 project_id=project_id,
210 chunksize=chunksize,
211 reauth=reauth,
212 if_exists=if_exists,
213 auth_local_webserver=auth_local_webserver,
214 table_schema=table_schema,
215 location=location,
216 progress_bar=progress_bar,
217 credentials=credentials,
218 verbose=verbose,
219 private_key=private_key,
220 )