Skip to content

Commit 98a7753

Browse files
craiglabenztseavergcf-owl-bot[bot]
authored
feat: add bulk writer (#396)
* feat: bulk writer 555 rate_limiter (#368) * added 555 throttle utility * Update google/cloud/firestore_v1/throttle.py Co-authored-by: Tres Seaver <[email protected]> * added ability to request a number of tokens * replaced Callable now parameter with module function * updated tests * renamed throttle -> ramp up * improved docstrings * linting * fixed test coverage * rename to RateLimiter and defer clock to first op * linting Co-authored-by: Tres Seaver <[email protected]> * feat: added new batch class for BulkWriter (#397) * feat: added new batch class for BulkWriter * updated docstring to use less colloquial language * feat: BulkWriter implementation (#384) * feat: added `write` method to batch classes * added docstrings to all 3 batch classes instead of just the base * updated batch classes to remove control flag now branches logic via subclasses * fixed broken tests off abstract class * fixed docstring * refactored BulkWriteBatch this commit increases the distance between WriteBatch and BulkWriteBatch * began adding [Async]BulkWriter * continued implementation * working impl or BW * tidied up BW impl * beginning of unit tests for BW * fixed merge problem * initial set of BW unit tests * refactored bulkwriter sending mechanism now consumes off the queue and schedules on the main thread, only going async to actually send * final CI touch ups * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/master/packages/owl-bot/README.md * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/master/packages/owl-bot/README.md * moved BulkWriter parameters to options format * rebased off master * test fixes Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> * feat: add retry support for BulkWriter errors (#413) * parent 0176cc7 author Craig Labenz <[email protected]> 1623693904 -0700 committer Craig Labenz <[email protected]> 1628617523 -0400 feat: add retries to bulk-writer * fixed rebase error Co-authored-by: Tres Seaver <[email protected]> Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent eb45a36 commit 98a7753

18 files changed

+2325
-16
lines changed

google/cloud/firestore_v1/async_client.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,19 @@ def __init__(
9696
client_options=client_options,
9797
)
9898

99+
def _to_sync_copy(self):
100+
from google.cloud.firestore_v1.client import Client
101+
102+
if not getattr(self, "_sync_copy", None):
103+
self._sync_copy = Client(
104+
project=self.project,
105+
credentials=self._credentials,
106+
database=self._database,
107+
client_info=self._client_info,
108+
client_options=self._client_options,
109+
)
110+
return self._sync_copy
111+
99112
@property
100113
def _firestore_api(self):
101114
"""Lazy-loading getter GAPIC Firestore API.

google/cloud/firestore_v1/base_batch.py

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,16 @@
1414

1515
"""Helpers for batch requests to the Google Cloud Firestore API."""
1616

17-
18-
from google.cloud.firestore_v1 import _helpers
17+
import abc
18+
from typing import Dict, Union
1919

2020
# Types needed only for Type Hints
21-
from google.cloud.firestore_v1.document import DocumentReference
22-
23-
from typing import Union
21+
from google.api_core import retry as retries # type: ignore
22+
from google.cloud.firestore_v1 import _helpers
23+
from google.cloud.firestore_v1.base_document import BaseDocumentReference
2424

2525

26-
class BaseWriteBatch(object):
26+
class BaseBatch(metaclass=abc.ABCMeta):
2727
"""Accumulate write operations to be sent in a batch.
2828
2929
This has the same set of methods for write operations that
@@ -38,9 +38,16 @@ class BaseWriteBatch(object):
3838
def __init__(self, client) -> None:
3939
self._client = client
4040
self._write_pbs = []
41+
self._document_references: Dict[str, BaseDocumentReference] = {}
4142
self.write_results = None
4243
self.commit_time = None
4344

45+
def __len__(self):
46+
return len(self._document_references)
47+
48+
def __contains__(self, reference: BaseDocumentReference):
49+
return reference._document_path in self._document_references
50+
4451
def _add_write_pbs(self, write_pbs: list) -> None:
4552
"""Add `Write`` protobufs to this transaction.
4653
@@ -52,7 +59,13 @@ def _add_write_pbs(self, write_pbs: list) -> None:
5259
"""
5360
self._write_pbs.extend(write_pbs)
5461

55-
def create(self, reference: DocumentReference, document_data: dict) -> None:
62+
@abc.abstractmethod
63+
def commit(self):
64+
"""Sends all accumulated write operations to the server. The details of this
65+
write depend on the implementing class."""
66+
raise NotImplementedError()
67+
68+
def create(self, reference: BaseDocumentReference, document_data: dict) -> None:
5669
"""Add a "change" to this batch to create a document.
5770
5871
If the document given by ``reference`` already exists, then this
@@ -65,11 +78,12 @@ def create(self, reference: DocumentReference, document_data: dict) -> None:
6578
creating a document.
6679
"""
6780
write_pbs = _helpers.pbs_for_create(reference._document_path, document_data)
81+
self._document_references[reference._document_path] = reference
6882
self._add_write_pbs(write_pbs)
6983

7084
def set(
7185
self,
72-
reference: DocumentReference,
86+
reference: BaseDocumentReference,
7387
document_data: dict,
7488
merge: Union[bool, list] = False,
7589
) -> None:
@@ -98,11 +112,12 @@ def set(
98112
reference._document_path, document_data
99113
)
100114

115+
self._document_references[reference._document_path] = reference
101116
self._add_write_pbs(write_pbs)
102117

103118
def update(
104119
self,
105-
reference: DocumentReference,
120+
reference: BaseDocumentReference,
106121
field_updates: dict,
107122
option: _helpers.WriteOption = None,
108123
) -> None:
@@ -126,10 +141,11 @@ def update(
126141
write_pbs = _helpers.pbs_for_update(
127142
reference._document_path, field_updates, option
128143
)
144+
self._document_references[reference._document_path] = reference
129145
self._add_write_pbs(write_pbs)
130146

131147
def delete(
132-
self, reference: DocumentReference, option: _helpers.WriteOption = None
148+
self, reference: BaseDocumentReference, option: _helpers.WriteOption = None
133149
) -> None:
134150
"""Add a "change" to delete a document.
135151
@@ -146,9 +162,15 @@ def delete(
146162
state of the document before applying changes.
147163
"""
148164
write_pb = _helpers.pb_for_delete(reference._document_path, option)
165+
self._document_references[reference._document_path] = reference
149166
self._add_write_pbs([write_pb])
150167

151-
def _prep_commit(self, retry, timeout):
168+
169+
class BaseWriteBatch(BaseBatch):
170+
"""Base class for a/sync implementations of the `commit` RPC. `commit` is useful
171+
for lower volumes or when the order of write operations is important."""
172+
173+
def _prep_commit(self, retry: retries.Retry, timeout: float):
152174
"""Shared setup for async/sync :meth:`commit`."""
153175
request = {
154176
"database": self._client._database_string,

google/cloud/firestore_v1/base_client.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,10 @@
3737
from google.cloud.firestore_v1 import __version__
3838
from google.cloud.firestore_v1 import types
3939
from google.cloud.firestore_v1.base_document import DocumentSnapshot
40-
40+
from google.cloud.firestore_v1.bulk_writer import (
41+
BulkWriter,
42+
BulkWriterOptions,
43+
)
4144
from google.cloud.firestore_v1.field_path import render_field_path
4245
from typing import (
4346
Any,
@@ -278,6 +281,21 @@ def _get_collection_reference(self, collection_id: str) -> BaseCollectionReferen
278281
def document(self, *document_path) -> BaseDocumentReference:
279282
raise NotImplementedError
280283

284+
def bulk_writer(self, options: Optional[BulkWriterOptions] = None) -> BulkWriter:
285+
"""Get a BulkWriter instance from this client.
286+
287+
Args:
288+
:class:`@google.cloud.firestore_v1.bulk_writer.BulkWriterOptions`:
289+
Optional control parameters for the
290+
:class:`@google.cloud.firestore_v1.bulk_writer.BulkWriter` returned.
291+
292+
Returns:
293+
:class:`@google.cloud.firestore_v1.bulk_writer.BulkWriter`:
294+
A utility to efficiently create and save many `WriteBatch` instances
295+
to the server.
296+
"""
297+
return BulkWriter(client=self, options=options)
298+
281299
def _document_path_helper(self, *document_path) -> List[str]:
282300
"""Standardize the format of path to tuple of path segments and strip the database string from path if present.
283301

google/cloud/firestore_v1/batch.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121

2222

2323
class WriteBatch(BaseWriteBatch):
24-
"""Accumulate write operations to be sent in a batch.
24+
"""Accumulate write operations to be sent in a batch. Use this over
25+
`BulkWriteBatch` for lower volumes or when the order of operations
26+
within a given batch is important.
2527
2628
This has the same set of methods for write operations that
2729
:class:`~google.cloud.firestore_v1.document.DocumentReference` does,
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright 2021 Google LLC All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Helpers for batch requests to the Google Cloud Firestore API."""
16+
from google.api_core import gapic_v1 # type: ignore
17+
from google.api_core import retry as retries # type: ignore
18+
19+
from google.cloud.firestore_v1 import _helpers
20+
from google.cloud.firestore_v1.base_batch import BaseBatch
21+
from google.cloud.firestore_v1.types.firestore import BatchWriteResponse
22+
23+
24+
class BulkWriteBatch(BaseBatch):
25+
"""Accumulate write operations to be sent in a batch. Use this over
26+
`WriteBatch` for higher volumes (e.g., via `BulkWriter`) and when the order
27+
of operations within a given batch is unimportant.
28+
29+
Because the order in which individual write operations are applied to the database
30+
is not guaranteed, `batch_write` RPCs can never contain multiple operations
31+
to the same document. If calling code detects a second write operation to a
32+
known document reference, it should first cut off the previous batch and
33+
send it, then create a new batch starting with the latest write operation.
34+
In practice, the [Async]BulkWriter classes handle this.
35+
36+
This has the same set of methods for write operations that
37+
:class:`~google.cloud.firestore_v1.document.DocumentReference` does,
38+
e.g. :meth:`~google.cloud.firestore_v1.document.DocumentReference.create`.
39+
40+
Args:
41+
client (:class:`~google.cloud.firestore_v1.client.Client`):
42+
The client that created this batch.
43+
"""
44+
45+
def __init__(self, client) -> None:
46+
super(BulkWriteBatch, self).__init__(client=client)
47+
48+
def commit(
49+
self, retry: retries.Retry = gapic_v1.method.DEFAULT, timeout: float = None
50+
) -> BatchWriteResponse:
51+
"""Writes the changes accumulated in this batch.
52+
53+
Write operations are not guaranteed to be applied in order and must not
54+
contain multiple writes to any given document. Preferred over `commit`
55+
for performance reasons if these conditions are acceptable.
56+
57+
Args:
58+
retry (google.api_core.retry.Retry): Designation of what errors, if any,
59+
should be retried. Defaults to a system-specified policy.
60+
timeout (float): The timeout for this request. Defaults to a
61+
system-specified value.
62+
63+
Returns:
64+
:class:`google.cloud.proto.firestore.v1.write.BatchWriteResponse`:
65+
Container holding the write results corresponding to the changes
66+
committed, returned in the same order as the changes were applied to
67+
this batch. An individual write result contains an ``update_time``
68+
field.
69+
"""
70+
request, kwargs = self._prep_commit(retry, timeout)
71+
72+
_api = self._client._firestore_api
73+
save_response: BatchWriteResponse = _api.batch_write(
74+
request=request, metadata=self._client._rpc_metadata, **kwargs,
75+
)
76+
77+
self._write_pbs = []
78+
self.write_results = list(save_response.write_results)
79+
80+
return save_response
81+
82+
def _prep_commit(self, retry: retries.Retry, timeout: float):
83+
request = {
84+
"database": self._client._database_string,
85+
"writes": self._write_pbs,
86+
"labels": None,
87+
}
88+
kwargs = _helpers.make_retry_timeout_kwargs(retry, timeout)
89+
return request, kwargs

0 commit comments

Comments
 (0)