diff options
author | Marko Kreen | 2010-06-28 08:10:29 +0000 |
---|---|---|
committer | Marko Kreen | 2010-07-09 10:09:28 +0000 |
commit | fc6aa8109265642ee50794fd35b54b0ef6cd2c65 (patch) | |
tree | 0814231e6a884d41f719966107684390342ac15b | |
parent | 4d6e264af7061934ae86af2cb69fb53899e7de1b (diff) |
Move querybuilder code into separate file and refresh with some new code.
-rw-r--r-- | python/skytools/__init__.py | 3 | ||||
-rwxr-xr-x | python/skytools/querybuilder.py | 376 | ||||
-rw-r--r-- | python/skytools/sqltools.py | 245 |
3 files changed, 379 insertions, 245 deletions
diff --git a/python/skytools/__init__.py b/python/skytools/__init__.py index 1dd3f348..28f28159 100644 --- a/python/skytools/__init__.py +++ b/python/skytools/__init__.py @@ -15,6 +15,7 @@ import skytools.parsing import skytools.dbstruct import skytools.adminscript import skytools.timeutil +import skytools.querybuilder from skytools.psycopgwrapper import * from skytools.config import * @@ -26,6 +27,7 @@ from skytools.quoting import * from skytools.parsing import * from skytools.adminscript import * from skytools.timeutil import * +from skytools.querybuilder import * __all__ = (skytools.psycopgwrapper.__all__ + skytools.config.__all__ @@ -36,6 +38,7 @@ __all__ = (skytools.psycopgwrapper.__all__ + skytools.quoting.__all__ + skytools.adminscript.__all__ + skytools.timeutil.__all__ + + skytools.querybuilder.__all__ + skytools.parsing.__all__) import skytools.installer_config diff --git a/python/skytools/querybuilder.py b/python/skytools/querybuilder.py new file mode 100755 index 00000000..a63b4aca --- /dev/null +++ b/python/skytools/querybuilder.py @@ -0,0 +1,376 @@ +#! /usr/bin/env python + + +"""Helper classes for complex query generation. + +Main target is code execution under PL/Python. + +Query parameters are referenced as C{{key}} or C{{key:type}}. +Type will be given to C{plpy.prepare}. +If C{type} is missing, C{text} is assumed. + +See L{plpy_exec} for examples. + +""" + +from skytools.sqltools import dbdict +from skytools.quoting import quote_literal + +__all__ = ['QueryBuilder', 'PLPyQueryBuilder', 'PLPyQuery', 'plpy_exec'] + +# make plpy available +try: + import plpy +except ImportError: + pass + + +PARAM_INLINE = 0 # quote_literal() +PARAM_DBAPI = 1 # %()s +PARAM_PLPY = 2 # $n + + +class QArgConf: + """Per-query arg-type config object.""" + param_type = None + +class QArg: + """Place-holder for a query parameter.""" + def __init__(self, name, value, pos, conf): + self.name = name + self.value = value + self.pos = pos + self.conf = conf + def __str__(self): + if self.conf.param_type == PARAM_INLINE: + return quote_literal(self.value) + elif self.conf.param_type == PARAM_DBAPI: + return "%s" + elif self.conf.param_type == PARAM_PLPY: + return "$%d" % self.pos + else: + raise Exception("bad QArgConf.param_type") + + +# need an structure with fast remove-from-middle +# and append operations. +class DList: + """Simple double-linked list.""" + def __init__(self): + self.next = self + self.prev = self + + def append(self, obj): + obj.next = self + obj.prev = self.prev + self.prev.next = obj + self.prev = obj + + def remove(self, obj): + obj.next.prev = obj.prev + obj.prev.next = obj.next + obj.next = obj.prev = None + + def empty(self): + return self.next == self + + def pop(self): + """Remove and return first element.""" + obj = None + if not self.empty(): + obj = self.next + self.remove(obj) + return obj + + +class CachedPlan: + """Wrapper around prepared plan.""" + def __init__(self, key, plan): + self.key = key # (sql, (types)) + self.plan = plan + + +class PlanCache: + """Cache for limited amount of plans.""" + + def __init__(self, maxplans = 100): + self.maxplans = maxplans + self.plan_map = {} + self.plan_list = DList() + + def get_plan(self, sql, types): + """Prepare the plan and cache it.""" + + t = (sql, tuple(types)) + if t in self.plan_map: + pc = self.plan_map[t] + # put to the end + self.plan_list.remove(pc) + self.plan_list.append(pc) + return pc.plan + + # prepare new plan + plan = plpy.prepare(sql, types) + + # add to cache + pc = CachedPlan(t, plan) + self.plan_list.append(pc) + self.plan_map[t] = pc + + # remove plans if too much + while len(self.plan_map) > self.maxplans: + pc = self.plan_list.pop() + del self.plan_map[pc.key] + + return plan + + +class QueryBuilder: + """Helper for query building. + + >>> args = {'success': 't', 'total': 45, 'ccy': 'EEK', 'id': 556} + >>> q = QueryBuilder("update orders set total = {total} where id = {id}", args) + >>> q.add(" and optional = {non_exist}") + >>> q.add(" and final = {success}") + >>> print q.get_sql(PARAM_INLINE) + update orders set total = '45' where id = '556' and final = 't' + >>> print q.get_sql(PARAM_DBAPI) + update orders set total = %s where id = %s and final = %s + >>> print q.get_sql(PARAM_PLPY) + update orders set total = $1 where id = $2 and final = $3 + """ + + def __init__(self, sqlexpr, params): + """Init the object. + + @param sqlexpr: Partial sql fragment. + @param params: Dict of parameter values. + """ + self._params = params + self._arg_type_list = [] + self._arg_value_list = [] + self._sql_parts = [] + self._arg_conf = QArgConf() + self._nargs = 0 + + if sqlexpr: + self.add(sqlexpr, required = True) + + def add(self, expr, type = "text", required = False): + """Add SQL fragment to query. + """ + self._add_expr('', expr, self._params, type, required) + + def get_sql(self, param_type = PARAM_INLINE): + """Return generated SQL (thus far) as string. + + Possible values for param_type: + - 0: Insert values quoted with quote_literal() + - 1: Insert %()s in place of parameters. + - 2: Insert $n in place of parameters. + """ + self._arg_conf.param_type = param_type + tmp = map(str, self._sql_parts) + return "".join(tmp) + + def _add_expr(self, pfx, expr, params, type, required): + parts = [] + types = [] + values = [] + nargs = self._nargs + if pfx: + parts.append(pfx) + pos = 0 + while 1: + # find start of next argument + a1 = expr.find('{', pos) + if a1 < 0: + parts.append(expr[pos:]) + break + + # find end end of argument name + a2 = expr.find('}', a1) + if a2 < 0: + raise Exception("missing argument terminator: "+expr) + + # add plain sql + if a1 > pos: + parts.append(expr[pos:a1]) + pos = a2 + 1 + + # get arg name, check if exists + k = expr[a1 + 1 : a2] + # split name from type + k = k.split(":", 1) + kparam = k[0] + try: + ktype = k[1] + except IndexError: + ktype = type + + # params==None means params are checked later + if params is not None and kparam not in params: + if required: + raise Exception("required parameter missing: "+kparam) + # optional fragment, param missing, skip it + return + + # got arg + nargs += 1 + if params is not None: + val = params[kparam] + else: + val = kparam + values.append(val) + types.append(ktype) + arg = QArg(kparam, val, nargs, self._arg_conf) + parts.append(arg) + + # add interesting parts to the main sql + self._sql_parts.extend(parts) + if types: + self._arg_type_list.extend(types) + if values: + self._arg_value_list.extend(values) + self._nargs = nargs + + def execute(self, curs): + """Client-side query execution on DB-API 2.0 cursor. + + Calls C{curs.execute()} with proper arguments. + + Returns result of curs.execute(), although that does not + return anything interesting. Later curs.fetch* methods + must be called to get result. + """ + q = self.get_sql(PARAM_DBAPI) + args = self._params + return curs.execute(q, args) + +class PLPyQueryBuilder(QueryBuilder): + + def __init__(self, sqlexpr, params, plan_cache = None, sqls = None): + """Init the object. + + @param sqlexpr: Partial sql fragment. + @param params: Dict of parameter values. + @param plan_cache: (PL/Python) A dict object where to store the plan cache, under the key C{"plan_cache"}. + If not given, plan will not be cached and values will be inserted directly + to query. Usually either C{GD} or C{SD} should be given here. + @param sqls: list object where to append executed sqls (used for debugging) + """ + QueryBuilder.__init__(self, sqlexpr, params) + self._sqls = sqls + + if plan_cache is not None: + if 'plan_cache' not in plan_cache: + plan_cache['plan_cache'] = PlanCache() + self._plan_cache = plan_cache['plan_cache'] + else: + self._plan_cache = None + + def execute(self): + """Server-side query execution via plpy. + + Query can be run either cached or uncached, depending + on C{plan_cache} setting given to L{__init__}. + + Returns result of plpy.execute(). + """ + + args = self._arg_value_list + types = self._arg_type_list + + if self._sqls is not None: + self._sqls.append( { "sql": self.get_sql(PARAM_INLINE) } ) + + if self._plan_cache is not None: + sql = self.get_sql(PARAM_PLPY) + plan = self._plan_cache.get_plan(sql, types) + res = plpy.execute(plan, args) + else: + sql = self.get_sql(PARAM_INLINE) + res = plpy.execute(sql) + if res: + res = [dbdict(r) for r in res] + return res + + +class PLPyQuery: + """Static, cached PL/Python query that uses QueryBuilder formatting. + + See L{plpy_exec} for simple usage. + """ + def __init__(self, sql): + qb = QueryBuilder(sql, None) + p_sql = qb.get_sql(PARAM_PLPY) + p_types = qb._arg_type_list + self.plan = plpy.prepare(p_sql, p_types) + self.arg_map = qb._arg_value_list + self.sql = sql + + def execute(self, arg_dict, all_keys_required = True): + try: + if all_keys_required: + arg_list = [arg_dict[k] for k in self.arg_map] + else: + arg_list = [arg_dict.get(k) for k in self.arg_map] + return plpy.execute(self.plan, arg_list) + except KeyError: + plpy.error("Missing argument: QUERY: %s ARGS: %s VALUES: %s" % ( + repr(self.sql), repr(self.arg_map), repr(arg_dict))) + + def __repr__(self): + return 'PLPyQuery<%s>' % self.sql + +def plpy_exec(gd, sql, args, all_keys_required = True): + """Cached plan execution for PL/Python. + + @param gd: dict to store cached plans under. If None, caching is disabled. + @param sql: SQL statement to execute. + @param args: dict of arguments to query. + @param all_keys_required: if False, missing key is taken as NULL, instead of throwing error. + + >>> res = plpy_exec(GD, "select {arg1}, {arg2:int4}, {arg1}", {'arg1': '1', 'arg2': '2'}) + DBG: plpy.prepare('select $1, $2, $3', ['text', 'int4', 'text']) + DBG: plpy.execute(('PLAN', 'select $1, $2, $3', ['text', 'int4', 'text']), ['1', '2', '1']) + >>> res = plpy_exec(None, "select {arg1}, {arg2:int4}, {arg1}", {'arg1': '1', 'arg2': '2'}) + DBG: plpy.execute("select '1', '2', '1'", []) + >>> res = plpy_exec(GD, "select {arg1}, {arg2:int4}, {arg1}", {'arg1': '3', 'arg2': '4'}) + DBG: plpy.execute(('PLAN', 'select $1, $2, $3', ['text', 'int4', 'text']), ['3', '4', '3']) + >>> res = plpy_exec(GD, "select {arg1}, {arg2:int4}, {arg1}", {'arg1': '3'}) + DBG: plpy.error("Missing argument: QUERY: 'select {arg1}, {arg2:int4}, {arg1}' ARGS: ['arg1', 'arg2', 'arg1'] VALUES: {'arg1': '3'}") + >>> res = plpy_exec(GD, "select {arg1}, {arg2:int4}, {arg1}", {'arg1': '3'}, False) + DBG: plpy.execute(('PLAN', 'select $1, $2, $3', ['text', 'int4', 'text']), ['3', None, '3']) + """ + + if gd is None: + return PLPyQueryBuilder(sql, args).execute() + + try: + sq = gd['plq_cache'][sql] + except KeyError: + if 'plq_cache' not in gd: + gd['plq_cache'] = {} + sq = PLPyQuery(sql) + gd['plq_cache'][sql] = sq + return sq.execute(args, all_keys_required) + +# fake plpy for testing +class fake_plpy: + def prepare(self, sql, types): + print "DBG: plpy.prepare(%s, %s)" % (repr(sql), repr(types)) + return ('PLAN', sql, types) + def execute(self, plan, args = []): + print "DBG: plpy.execute(%s, %s)" % (repr(plan), repr(args)) + def error(self, msg): + print "DBG: plpy.error(%s)" % repr(msg) + +# launch doctest +if __name__ == '__main__': + import doctest + plpy = fake_plpy() + GD = {} + doctest.testmod() + + diff --git a/python/skytools/sqltools.py b/python/skytools/sqltools.py index c3fde2b4..b36bb58c 100644 --- a/python/skytools/sqltools.py +++ b/python/skytools/sqltools.py @@ -18,13 +18,8 @@ __all__ = [ "CopyPipe", "full_copy", "DBObject", "DBSchema", "DBTable", "DBFunction", "DBLanguage", "db_install", "installer_find_file", "installer_apply_file", "dbdict", "mk_insert_sql", "mk_update_sql", "mk_delete_sql", - 'QueryBuilder', 'PLPyQueryBuilder', ] -PARAM_INLINE = 0 # quote_literal() -PARAM_DBAPI = 1 # %()s -PARAM_PLPY = 2 # $n - class dbdict(dict): """Wrapper on actual dict that allows accessing dict keys as attributes.""" @@ -582,246 +577,6 @@ def mk_delete_sql(row, tbl, pkey_list, field_map = None): whe_str = " and ".join(whe_list) return "delete from only %s where %s;" % (quote_fqident(tbl), whe_str) -class QArgConf: - """Per-query arg-type config object.""" - param_type = None - -class QArg: - """Place-holder for a query parameter.""" - def __init__(self, name, value, pos, conf): - self.name = name - self.value = value - self.pos = pos - self.conf = conf - def __str__(self): - if self.conf.param_type == PARAM_INLINE: - return skytools.quote_literal(self.value) - elif self.conf.param_type == PARAM_DBAPI: - return "%s" - elif self.conf.param_type == PARAM_PLPY: - return "$%d" % self.pos - else: - raise Exception("bad QArgConf.param_type") - - -# need an structure with fast remove-from-middle -# and append operations. -class DList: - """Simple double-linked list.""" - def __init__(self): - self.next = self - self.prev = self - - def append(self, obj): - obj.next = self - obj.prev = self.prev - self.prev.next = obj - self.prev = obj - - def remove(self, obj): - obj.next.prev = obj.prev - obj.prev.next = obj.next - obj.next = obj.prev = None - - def empty(self): - return self.next == self - - def pop(self): - """Remove and return first element.""" - obj = None - if not self.empty(): - obj = self.next - self.remove(obj) - return obj - - -class CachedPlan: - """Wrapper around prepared plan.""" - def __init__(self, key, plan): - self.key = key # (sql, (types)) - self.plan = plan - - -class PlanCache: - """Cache for limited amount of plans.""" - - def __init__(self, maxplans = 100): - self.maxplans = maxplans - self.plan_map = {} - self.plan_list = DList() - - def get_plan(self, sql, types): - """Prepare the plan and cache it.""" - - t = (sql, tuple(types)) - if t in self.plan_map: - pc = self.plan_map[t] - # put to the end - self.plan_list.remove(pc) - self.plan_list.append(pc) - return pc.plan - - # prepare new plan - plan = plpy.prepare(sql, types) - - # add to cache - pc = CachedPlan(t, plan) - self.plan_list.append(pc) - self.plan_map[t] = plan - - # remove plans if too much - while len(self.plan_map) > self.maxplans: - pc = self.plan_list.pop() - del self.plan_map[pc.key] - - return plan - - -class QueryBuilder: - """Helper for query building.""" - - def __init__(self, sqlexpr, params): - """Init the object. - - @param sqlexpr: Partial sql fragment. - @param params: Dict of parameter values. - """ - self._params = params - self._arg_type_list = [] - self._arg_value_list = [] - self._sql_parts = [] - self._arg_conf = QArgConf() - self._nargs = 0 - - if sqlexpr: - self.add(sqlexpr, required = True) - - def add(self, expr, type = "text", required = False): - """Add SQL fragment to query. - """ - self._add_expr('', expr, self._params, type, required) - - def get_sql(self, param_type = PARAM_INLINE): - """Return generated SQL (thus far) as string. - - Possible values for param_type: - - 0: Insert values quoted with quote_literal() - - 1: Insert %()s in place of parameters. - - 2: Insert $n in place of parameters. - """ - self._arg_conf.param_type = param_type - tmp = map(str, self._sql_parts) - return "".join(tmp) - - def _add_expr(self, pfx, expr, params, type, required): - parts = [] - types = [] - values = [] - nargs = self._nargs - if pfx: - parts.append(pfx) - pos = 0 - while 1: - # find start of next argument - a1 = expr.find('{', pos) - if a1 < 0: - parts.append(expr[pos:]) - break - - # find end end of argument name - a2 = expr.find('}', a1) - if a2 < 0: - raise Exception("missing argument terminator: "+expr) - - # add plain sql - if a1 > pos: - parts.append(expr[pos:a1]) - pos = a2 + 1 - - # get arg name, check if exists - k = expr[a1 + 1 : a2] - if k not in params: - if required: - raise Exception("required parameter missing: "+k) - return - - # got arg - nargs += 1 - val = params[k] - values.append(val) - types.append(type) - arg = QArg(k, val, nargs, self._arg_conf) - parts.append(arg) - - # add to the main sql only if all args exist - self._sql_parts.extend(parts) - if types: - self._arg_type_list.extend(types) - if values: - self._arg_value_list.extend(values) - self._nargs = nargs - - def execute(self, curs): - """Client-side query execution on DB-API 2.0 cursor. - - Calls C{curs.execute()} with proper arguments. - - Returns result of curs.execute(), although that does not - return anything interesting. Later curs.fetch* methods - must be called to get result. - """ - q = self.get_sql(PARAM_DBAPI) - args = self._params - return curs.execute(q, args) - -class PLPyQueryBuilder(QueryBuilder): - - def __init__(self, sqlexpr, params, plan_cache = None, sqls = None): - """Init the object. - - @param sqlexpr: Partial sql fragment. - @param params: Dict of parameter values. - @param plan_cache: (PL/Python) A dict object where to store the plan cache, under the key C{"plan_cache"}. - If not given, plan will not be cached and values will be inserted directly - to query. Usually either C{GD} or C{SD} should be given here. - @param sqls: list object where to append executed sqls (used for debugging) - """ - QueryBuilder.__init__(self, sqlexpr, params) - self._sqls = sqls - - if plan_cache: - if 'plan_cache' not in plan_cache: - plan_cache['plan_cache'] = PlanCache() - self._plan_cache = plan_cache['plan_cache'] - else: - self._plan_cache = None - - def execute(self): - """Server-size query execution via plpy. - - Query can be run either cached or uncached, depending - on C{plan_cache} setting given to L{__init__}. - - Returns result of plpy.execute(). - """ - - args = self._arg_value_list - types = self._arg_type_list - - if self._sqls is not None: - self._sqls.append( { "sql": self.get_sql(PARAM_INLINE) } ) - - if self._plan_cache: - sql = self.get_sql(PARAM_PLPY) - plan = self._plan_cache.get_plan(sql, types) - res = plpy.execute(plan, args) - else: - sql = self.get_sql(PARAM_INLINE) - res = plpy.execute(sql) - if res: - res = [dbdict(r) for r in res] - return res - if __name__ == '__main__': import doctest doctest.testmod() |