Skip to content

Commit aa9aca8

Browse files
feat: Implement committer (#13)
* feat: Implement committer Also small fix to retrying connection so it doesn't leak reads/writes from previous connections. * fix: Patch retrying connection and add comments. * Update committer.py * Update committer_impl.py
1 parent baeb0f6 commit aa9aca8

File tree

7 files changed

+462
-38
lines changed

7 files changed

+462
-38
lines changed
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from abc import abstractmethod
2+
from typing import AsyncContextManager
3+
4+
from google.cloud.pubsublite_v1 import Cursor
5+
6+
7+
class Committer(AsyncContextManager):
8+
"""
9+
A Committer is able to commit subscribers' completed offsets.
10+
"""
11+
@abstractmethod
12+
async def commit(self, cursor: Cursor) -> None:
13+
pass
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import asyncio
2+
from typing import Optional, List, Iterable
3+
4+
from absl import logging
5+
6+
from google.cloud.pubsublite.internal.wire.committer import Committer
7+
from google.cloud.pubsublite.internal.wire.retrying_connection import RetryingConnection, ConnectionFactory
8+
from google.api_core.exceptions import FailedPrecondition, GoogleAPICallError
9+
from google.cloud.pubsublite.internal.wire.connection_reinitializer import ConnectionReinitializer
10+
from google.cloud.pubsublite.internal.wire.connection import Connection
11+
from google.cloud.pubsublite.internal.wire.serial_batcher import SerialBatcher, BatchTester
12+
from google.cloud.pubsublite_v1 import Cursor
13+
from google.cloud.pubsublite_v1.types import StreamingCommitCursorRequest, StreamingCommitCursorResponse, InitialCommitCursorRequest
14+
from google.cloud.pubsublite.internal.wire.work_item import WorkItem
15+
16+
17+
class CommitterImpl(Committer, ConnectionReinitializer[StreamingCommitCursorRequest, StreamingCommitCursorResponse], BatchTester[Cursor]):
18+
_initial: InitialCommitCursorRequest
19+
_flush_seconds: float
20+
_connection: RetryingConnection[StreamingCommitCursorRequest, StreamingCommitCursorResponse]
21+
22+
_batcher: SerialBatcher[Cursor, None]
23+
24+
_outstanding_commits: List[List[WorkItem[Cursor, None]]]
25+
26+
_receiver: Optional[asyncio.Future]
27+
_flusher: Optional[asyncio.Future]
28+
29+
def __init__(self, initial: InitialCommitCursorRequest, flush_seconds: float,
30+
factory: ConnectionFactory[StreamingCommitCursorRequest, StreamingCommitCursorResponse]):
31+
self._initial = initial
32+
self._flush_seconds = flush_seconds
33+
self._connection = RetryingConnection(factory, self)
34+
self._batcher = SerialBatcher(self)
35+
self._outstanding_commits = []
36+
self._receiver = None
37+
self._flusher = None
38+
39+
async def __aenter__(self):
40+
await self._connection.__aenter__()
41+
42+
def _start_loopers(self):
43+
assert self._receiver is None
44+
assert self._flusher is None
45+
self._receiver = asyncio.ensure_future(self._receive_loop())
46+
self._flusher = asyncio.ensure_future(self._flush_loop())
47+
48+
async def _stop_loopers(self):
49+
if self._receiver:
50+
self._receiver.cancel()
51+
await self._receiver
52+
self._receiver = None
53+
if self._flusher:
54+
self._flusher.cancel()
55+
await self._flusher
56+
self._flusher = None
57+
58+
def _handle_response(self, response: StreamingCommitCursorResponse):
59+
if "commit" not in response:
60+
self._connection.fail(FailedPrecondition("Received an invalid subsequent response on the commit stream."))
61+
if response.commit.acknowledged_commits > len(self._outstanding_commits):
62+
self._connection.fail(
63+
FailedPrecondition("Received a commit response on the stream with no outstanding commits."))
64+
for _ in range(response.commit.acknowledged_commits):
65+
batch = self._outstanding_commits.pop(0)
66+
for item in batch:
67+
item.response_future.set_result(None)
68+
69+
async def _receive_loop(self):
70+
try:
71+
while True:
72+
response = await self._connection.read()
73+
self._handle_response(response)
74+
except asyncio.CancelledError:
75+
return
76+
77+
async def _flush_loop(self):
78+
try:
79+
while True:
80+
await asyncio.sleep(self._flush_seconds)
81+
await self._flush()
82+
except asyncio.CancelledError:
83+
return
84+
85+
async def __aexit__(self, exc_type, exc_val, exc_tb):
86+
if self._connection.error():
87+
self._fail_if_retrying_failed()
88+
else:
89+
await self._flush()
90+
await self._connection.__aexit__(exc_type, exc_val, exc_tb)
91+
92+
def _fail_if_retrying_failed(self):
93+
if self._connection.error():
94+
for batch in self._outstanding_commits:
95+
for item in batch:
96+
item.response_future.set_exception(self._connection.error())
97+
98+
async def _flush(self):
99+
batch = self._batcher.flush()
100+
if not batch:
101+
return
102+
self._outstanding_commits.append(batch)
103+
req = StreamingCommitCursorRequest()
104+
req.commit.cursor = batch[-1].request
105+
try:
106+
await self._connection.write(req)
107+
except GoogleAPICallError as e:
108+
logging.debug(f"Failed commit on stream: {e}")
109+
self._fail_if_retrying_failed()
110+
111+
async def commit(self, cursor: Cursor) -> None:
112+
future = self._batcher.add(cursor)
113+
if self._batcher.should_flush():
114+
# always returns false currently, here in case this changes in the future.
115+
await self._flush()
116+
await future
117+
118+
async def reinitialize(self, connection: Connection[StreamingCommitCursorRequest, StreamingCommitCursorResponse]):
119+
await self._stop_loopers()
120+
await connection.write(StreamingCommitCursorRequest(initial=self._initial))
121+
response = await connection.read()
122+
if "initial" not in response:
123+
self._connection.fail(FailedPrecondition("Received an invalid initial response on the publish stream."))
124+
if self._outstanding_commits:
125+
# Roll up outstanding commits
126+
rollup: List[WorkItem[Cursor, None]] = []
127+
for batch in self._outstanding_commits:
128+
for item in batch:
129+
rollup.append(item)
130+
self._outstanding_commits = [rollup]
131+
req = StreamingCommitCursorRequest()
132+
req.commit.cursor = rollup[-1].request
133+
await connection.write(req)
134+
self._start_loopers()
135+
136+
def test(self, requests: Iterable[Cursor]) -> bool:
137+
# There is no bound on the number of outstanding cursors.
138+
return False

google/cloud/pubsublite/internal/wire/connection_reinitializer.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ class ConnectionReinitializer(Generic[Request, Response], metaclass=ABCMeta):
77
"""A class capable of reinitializing a connection after a new one has been created."""
88
@abstractmethod
99
def reinitialize(self, connection: Connection[Request, Response]):
10-
"""Reinitialize a connection.
10+
"""Reinitialize a connection. Must ensure no calls to the associated RetryingConnection
11+
occur until this completes.
1112
1213
Args:
1314
connection: The connection to reinitialize

google/cloud/pubsublite/internal/wire/publisher.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,13 @@
1-
from abc import ABC, abstractmethod
1+
from abc import abstractmethod
2+
from typing import AsyncContextManager
23
from google.cloud.pubsublite_v1.types import PubSubMessage
34
from google.cloud.pubsublite.publish_metadata import PublishMetadata
45

56

6-
class Publisher(ABC):
7+
class Publisher(AsyncContextManager):
78
"""
89
A Pub/Sub Lite asynchronous wire protocol publisher.
910
"""
10-
@abstractmethod
11-
async def __aenter__(self):
12-
raise NotImplementedError()
13-
14-
@abstractmethod
15-
async def __aexit__(self, exc_type, exc_val, exc_tb):
16-
raise NotImplementedError()
17-
1811
@abstractmethod
1912
async def publish(self, message: PubSubMessage) -> PublishMetadata:
2013
"""

google/cloud/pubsublite/internal/wire/retrying_connection.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,22 @@ async def _run_loop(self):
4848
"""
4949
Processes actions on this connection and handles retries until cancelled.
5050
"""
51+
last_failure: GoogleAPICallError
5152
try:
5253
bad_retries = 0
5354
while True:
5455
try:
5556
async with self._connection_factory.new() as connection:
57+
# Needs to happen prior to reinitialization to clear outstanding waiters.
58+
while not self._write_queue.empty():
59+
self._write_queue.get_nowait().response_future.set_exception(last_failure)
60+
self._read_queue = asyncio.Queue(maxsize=1)
61+
self._write_queue = asyncio.Queue(maxsize=1)
5662
await self._reinitializer.reinitialize(connection)
5763
bad_retries = 0
5864
await self._loop_connection(connection)
5965
except GoogleAPICallError as e:
66+
last_failure = e
6067
if not is_retryable(e):
6168
self.fail(e)
6269
return

setup.py

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -25,40 +25,46 @@
2525

2626
readme_filename = os.path.join(package_root, "README.rst")
2727
with io.open(readme_filename, encoding="utf-8") as readme_file:
28-
readme = readme_file.read()
28+
readme = readme_file.read()
2929

3030
dependencies = [
3131
"google-api-core >= 1.22.0",
3232
"absl-py >= 0.9.0",
3333
"proto-plus >= 0.4.0",
34+
"grpcio",
3435
"setuptools"
3536
]
3637

38+
test_dependencies = [
39+
"asynctest",
40+
"pytest",
41+
"pytest-asyncio"
42+
]
43+
3744
setuptools.setup(
38-
name="google-cloud-pubsublite",
39-
version=version,
40-
long_description=readme,
41-
author="Google LLC",
42-
author_email="[email protected]",
43-
license="Apache 2.0",
44-
url="https://github.com/googleapis/python-pubsublite",
45-
packages=setuptools.PEP420PackageFinder.find(),
46-
namespace_packages=("google", "google.cloud"),
47-
platforms="Posix; MacOS X; Windows",
48-
include_package_data=True,
49-
install_requires=dependencies,
50-
setup_requires=('pytest-runner',),
51-
tests_require=['asynctest', 'pytest', 'pytest-asyncio'],
52-
python_requires=">=3.6",
53-
classifiers=[
54-
"Development Status :: 4 - Beta",
55-
"Intended Audience :: Developers",
56-
"Operating System :: OS Independent",
57-
"Programming Language :: Python :: 3.6",
58-
"Programming Language :: Python :: 3.7",
59-
"Programming Language :: Python :: 3.8",
60-
"Topic :: Internet",
61-
"Topic :: Software Development :: Libraries :: Python Modules",
62-
],
63-
zip_safe=False,
45+
name="google-cloud-pubsublite",
46+
version=version,
47+
long_description=readme,
48+
author="Google LLC",
49+
author_email="[email protected]",
50+
license="Apache 2.0",
51+
url="https://github.com/googleapis/python-pubsublite",
52+
packages=setuptools.PEP420PackageFinder.find(),
53+
namespace_packages=("google", "google.cloud"),
54+
platforms="Posix; MacOS X; Windows",
55+
include_package_data=True,
56+
install_requires=dependencies,
57+
extras_require={"tests": test_dependencies},
58+
python_requires=">=3.6",
59+
classifiers=[
60+
"Development Status :: 4 - Beta",
61+
"Intended Audience :: Developers",
62+
"Operating System :: OS Independent",
63+
"Programming Language :: Python :: 3.6",
64+
"Programming Language :: Python :: 3.7",
65+
"Programming Language :: Python :: 3.8",
66+
"Topic :: Internet",
67+
"Topic :: Software Development :: Libraries :: Python Modules",
68+
],
69+
zip_safe=False,
6470
)

0 commit comments

Comments
 (0)