0% found this document useful (0 votes)
18 views

index

The document outlines a Python implementation of a data management system using Firestore as a datastore and HTTP for message brokering. It includes abstract classes for data storage and message publishing, along with concrete implementations for Firestore and HTTP. Additionally, it features a data manipulation class, validation logic, and an execution engine to manage service configurations and flow execution.

Uploaded by

1902761
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
18 views

index

The document outlines a Python implementation of a data management system using Firestore as a datastore and HTTP for message brokering. It includes abstract classes for data storage and message publishing, along with concrete implementations for Firestore and HTTP. Additionally, it features a data manipulation class, validation logic, and an execution engine to manage service configurations and flow execution.

Uploaded by

1902761
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 6

import json

import requests
import numpy as np

from functools import reduce


from datetime import datetime
from google.cloud import firestore
from abc import ABC, abstractmethod
from collections.abc import Iterable
from google.cloud.firestore_v1 import FieldFilter
from typing import Any, Dict, List, Optional, Union
from tenacity import retry, wait_exponential, stop_after_attempt
from dateutil.relativedelta import relativedelta, MO, TU, WE, TH, FR, SA, SU

# Interfaces
class DataStore(ABC):
@abstractmethod
def read(self, collection: str, identifier: Optional[str] = None, filters:
Optional[List] = None) -> List[Dict]:
pass

@abstractmethod
def write(self, collection: str, data: Union[Dict, List[Dict]]) -> None:
pass

@abstractmethod
def update(self, collection: str, identifier: str, updates: Dict) -> None:
pass

@abstractmethod
def delete(self, collection: str, identifier: Optional[str] = None, field:
Optional[str] = None) -> None:
pass

class MessageBroker(ABC):
@abstractmethod
def publish(self, config: Dict, payload: Dict, api_key: Optional[str] = None) -
> Dict:
pass

# Implementations
class FirestoreDataStore(DataStore):
def __init__(self, client: firestore.Client):
self.db = client

def read(self, collection: str, identifier: Optional[str] = None, filters:


Optional[List] = None) -> List[Dict]:
collection_ref = self.db.collection(collection)

if identifier:
doc = collection_ref.document(identifier).get()
return [doc.to_dict()] if doc.exists else []

if filters:
query = collection_ref
for field, operator, value in filters:
query = query.where(filter=FieldFilter(field, operator, value))
docs = query.stream()
else:
docs = collection_ref.stream()

return [doc.to_dict() for doc in docs]

def write(self, collection: str, data: Union[Dict, List[Dict]]) -> None:


collection_ref = self.db.collection(collection)
batch = self.db.batch()

documents = [data] if isinstance(data, dict) else data


document_tags_seen = set()

for document in documents:


document_tag = document.get("document_tag")
contents = document.get("contents")

if not document_tag or not contents:


raise ValueError("Each document must have 'document_tag' and
'contents'.")

if document_tag in document_tags_seen:
raise ValueError(f"Duplicate document tag '{document_tag}'")

document_tags_seen.add(document_tag)
doc_ref = collection_ref.document(document_tag)

if doc_ref.get().exists:
raise ValueError(f"Document '{document_tag}' already exists")

batch.set(doc_ref, contents)

batch.commit()

def update(self, collection: str, identifier: str, updates: Dict) -> None:
doc_ref = self.db.collection(collection).document(identifier)
if not doc_ref.get().exists:
raise ValueError(f"Document '{identifier}' does not exist")
doc_ref.update(updates)

def delete(self, collection: str, identifier: Optional[str] = None, field:


Optional[str] = None) -> None:
collection_ref = self.db.collection(collection)

if identifier and field:


doc_ref = collection_ref.document(identifier)
doc_ref.update({field: firestore.DELETE_FIELD})
elif identifier:
collection_ref.document(identifier).delete()
else:
self._delete_collection(collection_ref)

def _delete_collection(self, coll_ref, batch_size: int = 10) -> None:


docs = coll_ref.limit(batch_size).stream()
deleted = 0

batch = self.db.batch()
for doc in docs:
batch.delete(doc.reference)
deleted += 1

if deleted >= batch_size:


self._delete_collection(coll_ref, batch_size)

class HTTPMessageBroker(MessageBroker):
@retry(wait=wait_exponential(min=1, max=10), stop=stop_after_attempt(3))
def publish(self, config: Dict, payload: Dict, api_key: Optional[str] = None) -
> Dict:
if api_key:
config = json.loads(json.dumps(config).replace("{api_key_here}",
api_key))

response = requests.post(
url=config["url"],
headers=config.get("headers"),
json=payload
)

if response.status_code >= 400:


raise Exception(f"External service error: {response.text}")

return response.json()

class DataManipulator:
def __init__(self):
self.cache = {}

def process(self, action: str, params: Dict, output_key: Optional[str] = None)


-> Any:
action_type = params.get('type')
values = params.get('values', [])

if action_type == 'numeric':
result = self._handle_numeric(action, values)
elif action_type == 'string':
result = self._handle_string(action, params)
elif action_type == 'datetime':
result = self._handle_datetime(params)
elif action_type == 'collection':
result = self._handle_collection(action, params)
else:
raise ValueError(f"Unsupported action type: {action_type}")

if output_key:
self.cache[output_key] = result

return result

def _handle_numeric(self, action: str, values: List) -> Any:


try:
return getattr(np, action)(*values)
except AttributeError:
raise ValueError(f"Invalid NumPy action: {action}")

def _handle_string(self, action: str, params: Dict) -> str:


values = params.get('values', [])
if len(values) != 1 or not isinstance(values[0], str):
raise ValueError("String actions require a single string value")

s = values[0]
args = [
self.cache[arg] if isinstance(arg, str) and arg in self.cache else arg
for arg in params.get('args', [])
]
return getattr(s, action)(*args)

def _handle_datetime(self, params: Dict) -> datetime:


base_date = params.get('base_date', datetime.now())
weekday_name = params.get('weekday_name')
weekday_offset = params.get('weekday_offset', 0)
kwargs = params.get('kwargs', {})

weekdays = {"MO": MO, "TU": TU, "WE": WE, "TH": TH, "FR": FR, "SA": SA,
"SU": SU}

if weekday_name and weekday_name in weekdays:


kwargs["weekday"] = weekdays[weekday_name](weekday_offset)

return base_date + relativedelta(**kwargs)

def _handle_collection(self, action: str, params: Dict) -> Any:


collection = params.get('collection', [])
if isinstance(collection, str):
collection = self.cache.get(collection.strip('{}'), [])

if not isinstance(collection, list):


raise ValueError("Collection must be a list")

operations = {
"filter": lambda: list(filter(params['predicate'], collection)),
"map": lambda: list(map(params['transform'], collection)),
"reduce": lambda: reduce(params['reducer'], collection),
"sort": lambda: sorted(collection, key=params.get('args', [None])[0]),
"find": lambda: next((item for item in collection if
params.get('predicate')(item)), None),
"index_of": lambda: collection.index(params['value']) if
params.get('value') in collection else -1,
"flatten": lambda: list(self._flatten(collection)),
"unique": lambda: list(set(collection))
}

if action not in operations:


raise ValueError(f"Unsupported collection operation: {action}")

return operations[action]()

def _flatten(self, lst: List) -> Iterable:


for el in lst:
if isinstance(el, Iterable) and not isinstance(el, (str, bytes)):
yield from self._flatten(el)
else:
yield el

class Validator:
"""Handles validation logic for attributes."""
def __init__(self, invariants):
self.invariants = invariants

def validate(self, attribute_name, value):


if attribute_name not in self.invariants:
raise ValueError(f"Attribute '{attribute_name}' not defined.")
attr_config = self.invariants[attribute_name]
expected_type = self._get_expected_type(attr_config.get("dtype"))
if expected_type and not isinstance(value, expected_type):
raise TypeError(f"Expected '{expected_type.__name__}' for
'{attribute_name}', got '{type(value).__name__}'.")
self._validate_constraints(attr_config, value)

def _get_expected_type(self, dtype):


type_map = {"string": str, "integer": int, "float": float, "boolean": bool,
"datetime": str}
return type_map.get(dtype)

def _validate_constraints(self, config, value):


constraints = config.get("constraints", {})
if "min" in constraints and value < constraints["min"]:
raise ValueError(f"Value '{value}' is less than the minimum allowed.")
if "max" in constraints and value > constraints["max"]:
raise ValueError(f"Value '{value}' exceeds the maximum allowed.")

class ServiceConfig:
def __init__(self, data_store: DataStore):
self.data_store = data_store

def load(self, service_name: str) -> Dict:


config = self.data_store.read('services', service_name)
if not config:
raise ValueError(f"Service '{service_name}' not found")
return config[0]["data"]

def get_domain_objects(self, flow) -> Dict:


domain_objects = {}
references = {
step.get("params", {}).get("entity_name") or step.get("params",
{}).get("entity")
for step in flow
if step.get("params", {}).get("entity_name") or step.get("params",
{}).get("entity")
}
for reference in references:
config = self.data_store.read('domain_objects', reference)
if config:
domain_objects[reference] = config[0]["data"]
return domain_objects

def _build_invariants(self, domain_objects):


invariants = {}
for domain, obj in domain_objects.items():
for attr, attr_config in obj.get("attributes", {}).items():
invariants[attr] = attr_config
return invariants
class ExecutionEngine:
def __init__(self, data_store: DataStore, message_broker: MessageBroker):
self.data_store = data_store
self.validator = Validator()
self.message_broker = message_broker
self.manipulator = DataManipulator()
self.config_loader = ServiceConfig(data_store)
self.cache = {}

def initialize(self, service_name):


config = self.config_service.get_service_config(service_name)
self.flow = config.get("flow", [])
self.subscribers = config.get("subscribers", [])
self.domain_objects = self.config_loader.get_domain_objects(self.flow)
self.invariants = self.config_loader._build_invariants(self.domain_objects)

def validate(self, attribute_name, value):


self.validator.validate(attribute_name, value)

def execute_flow(self):
# add try except block
for step in self.flow:
# Simplified logic for flow execution
action = step["action"]
params = step.get("params", {})
self._execute_action(action, params)

# Notify subscribers
if subscribers:
for subscriber in subscribers:
self.message_broker.publish(
config={"subscriber": subscriber},
payload={"post_condition": config.get("post_condition")}
)

# should return something : eg a bool, str, output, result etc

def _execute_action(self, action, params):


if action == "write":
# call validate
self.data_store.write(**params)
elif action == "update":
# call validate
self.data_store.update(**params)
elif action == "delete":
self.data_store.delete(**params)
elif action == "read":
self.data_store.read(**params)
elif action == "manipulate":
self.manipulator.process(**params)
elif action == "translate":
pass
elif action == "send":
self.message_broker.publish(**params)
else:
print("action unknown!")

You might also like