Skip to content

Commit 903070d

Browse files
feat: Implement transforms to/from Cloud Pub/Sub Messages (#20)
* feat: Implement AckSetTracker which tracks message acknowledgements. Note that it is awkward to structure this like the java version, as there is no "AsyncCallable" type in python. * fix: Fix comments on ack_set_tracker. * feat: Implement transforms to/from Pub/Sub messages and Pub/Sub Lite messages. * fix: Change test to handle a bug in proto-plus-python. The following code returns 18005 seconds as the time is timezone adjusted: PubSubMessage(event_time=Timestamp(seconds=5).ToDatetime()) * fix: Replace try-blocks with pytest.raises. * fix: Replace pickle encoding/decoding with specific per-type encoding/decoding. Co-authored-by: Daniel Collins <[email protected]>
1 parent 7f88458 commit 903070d

File tree

3 files changed

+151
-0
lines changed

3 files changed

+151
-0
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import datetime
2+
3+
from google.api_core.exceptions import InvalidArgument
4+
from google.protobuf.timestamp_pb2 import Timestamp
5+
from google.pubsub_v1 import PubsubMessage
6+
7+
from google.cloud.pubsublite_v1 import AttributeValues, SequencedMessage, PubSubMessage
8+
9+
PUBSUB_LITE_EVENT_TIME = "x-goog-pubsublite-event-time"
10+
11+
12+
def encode_attribute_event_time(dt: datetime.datetime) -> str:
13+
ts = Timestamp()
14+
ts.FromDatetime(dt)
15+
return ts.ToJsonString()
16+
17+
18+
def decode_attribute_event_time(attr: str) -> datetime.datetime:
19+
try:
20+
ts = Timestamp()
21+
ts.FromJsonString(attr)
22+
return ts.ToDatetime()
23+
except ValueError:
24+
raise InvalidArgument("Invalid value for event time attribute.")
25+
26+
27+
def _parse_attributes(values: AttributeValues) -> str:
28+
if not len(values.values) == 1:
29+
raise InvalidArgument("Received an unparseable message with multiple values for an attribute.")
30+
value: bytes = values.values[0]
31+
try:
32+
return value.decode('utf-8')
33+
except UnicodeError:
34+
raise InvalidArgument("Received an unparseable message with a non-utf8 attribute.")
35+
36+
37+
def to_cps_subscribe_message(source: SequencedMessage) -> PubsubMessage:
38+
message: PubsubMessage = to_cps_publish_message(source.message)
39+
message.message_id = str(source.cursor.offset)
40+
message.publish_time = source.publish_time
41+
return message
42+
43+
44+
def to_cps_publish_message(source: PubSubMessage) -> PubsubMessage:
45+
out = PubsubMessage()
46+
try:
47+
out.ordering_key = source.key.decode('utf-8')
48+
except UnicodeError:
49+
raise InvalidArgument("Received an unparseable message with a non-utf8 key.")
50+
if PUBSUB_LITE_EVENT_TIME in source.attributes:
51+
raise InvalidArgument("Special timestamp attribute exists in wire message. Unable to parse message.")
52+
out.data = source.data
53+
for key, values in source.attributes.items():
54+
out.attributes[key] = _parse_attributes(values)
55+
if 'event_time' in source:
56+
out.attributes[PUBSUB_LITE_EVENT_TIME] = encode_attribute_event_time(source.event_time)
57+
return out
58+
59+
60+
def from_cps_publish_message(source: PubsubMessage) -> PubSubMessage:
61+
out = PubSubMessage()
62+
if PUBSUB_LITE_EVENT_TIME in source.attributes:
63+
out.event_time = decode_attribute_event_time(source.attributes[PUBSUB_LITE_EVENT_TIME])
64+
out.data = source.data
65+
out.key = source.ordering_key.encode('utf-8')
66+
for key, value in source.attributes.items():
67+
if key != PUBSUB_LITE_EVENT_TIME:
68+
out.attributes[key] = AttributeValues(values=[value.encode('utf-8')])
69+
return out
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,21 @@
11
from typing import NamedTuple
2+
import json
3+
24
from google.cloud.pubsublite_v1.types.common import Cursor
35
from google.cloud.pubsublite.partition import Partition
46

57

68
class PublishMetadata(NamedTuple):
79
partition: Partition
810
cursor: Cursor
11+
12+
def encode(self) -> str:
13+
return json.dumps({
14+
'partition': self.partition.value,
15+
'offset': self.cursor.offset
16+
})
17+
18+
@staticmethod
19+
def decode(source: str) -> 'PublishMetadata':
20+
loaded = json.loads(source)
21+
return PublishMetadata(partition=Partition(loaded['partition']), cursor=Cursor(offset=loaded['offset']))
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import datetime
2+
3+
import pytest
4+
from google.api_core.exceptions import InvalidArgument
5+
from google.protobuf.timestamp_pb2 import Timestamp
6+
from google.pubsub_v1 import PubsubMessage
7+
8+
from google.cloud.pubsublite.cloudpubsub.message_transforms import PUBSUB_LITE_EVENT_TIME, to_cps_subscribe_message, \
9+
encode_attribute_event_time, from_cps_publish_message
10+
from google.cloud.pubsublite_v1 import SequencedMessage, Cursor, PubSubMessage, AttributeValues
11+
12+
NOT_UTF8 = bytes.fromhex('ffff')
13+
14+
15+
def test_invalid_subscribe_transform_key():
16+
with pytest.raises(InvalidArgument):
17+
to_cps_subscribe_message(
18+
SequencedMessage(message=PubSubMessage(key=NOT_UTF8), publish_time=Timestamp(), cursor=Cursor(offset=10),
19+
size_bytes=10))
20+
21+
22+
def test_invalid_subscribe_contains_magic_attribute():
23+
with pytest.raises(InvalidArgument):
24+
to_cps_subscribe_message(SequencedMessage(
25+
message=PubSubMessage(key=b'def', attributes={PUBSUB_LITE_EVENT_TIME: AttributeValues(values=[b'abc'])}),
26+
publish_time=Timestamp(seconds=10), cursor=Cursor(offset=10), size_bytes=10))
27+
28+
29+
def test_invalid_subscribe_contains_multiple_attributes():
30+
with pytest.raises(InvalidArgument):
31+
to_cps_subscribe_message(SequencedMessage(
32+
message=PubSubMessage(key=b'def', attributes={'xyz': AttributeValues(values=[b'abc', b''])}),
33+
publish_time=Timestamp(seconds=10), cursor=Cursor(offset=10), size_bytes=10))
34+
35+
36+
def test_invalid_subscribe_contains_non_utf8_attributes():
37+
with pytest.raises(InvalidArgument):
38+
to_cps_subscribe_message(SequencedMessage(
39+
message=PubSubMessage(key=b'def', attributes={'xyz': AttributeValues(values=[NOT_UTF8])}),
40+
publish_time=Timestamp(seconds=10), cursor=Cursor(offset=10), size_bytes=10))
41+
42+
43+
def test_subscribe_transform_correct():
44+
expected = PubsubMessage(
45+
data=b'xyz', ordering_key='def', attributes={'x': 'abc', 'y': 'abc',
46+
PUBSUB_LITE_EVENT_TIME: encode_attribute_event_time(
47+
Timestamp(seconds=55).ToDatetime())},
48+
message_id=str(10), publish_time=Timestamp(seconds=10))
49+
result = to_cps_subscribe_message(SequencedMessage(
50+
message=PubSubMessage(data=b'xyz', key=b'def', event_time=Timestamp(seconds=55),
51+
attributes={'x': AttributeValues(values=[b'abc']), 'y': AttributeValues(values=[b'abc'])}),
52+
publish_time=Timestamp(seconds=10), cursor=Cursor(offset=10), size_bytes=10))
53+
assert result == expected
54+
55+
56+
def test_publish_invalid_event_time():
57+
with pytest.raises(InvalidArgument):
58+
from_cps_publish_message(PubsubMessage(attributes={PUBSUB_LITE_EVENT_TIME: 'probably not an encoded proto'}))
59+
60+
61+
def test_publish_valid_transform():
62+
now = datetime.datetime.now()
63+
expected = PubSubMessage(data=b'xyz', key=b'def', event_time=now,
64+
attributes={'x': AttributeValues(values=[b'abc']), 'y': AttributeValues(values=[b'abc'])})
65+
result = from_cps_publish_message(PubsubMessage(
66+
data=b'xyz', ordering_key='def', attributes={'x': 'abc', 'y': 'abc',
67+
PUBSUB_LITE_EVENT_TIME: encode_attribute_event_time(
68+
now)}))
69+
assert result == expected

0 commit comments

Comments
 (0)