diff options
author | martinko | 2011-12-14 14:32:55 +0000 |
---|---|---|
committer | martinko | 2011-12-14 14:32:55 +0000 |
commit | e50aad140becbf56de9ea2c08683a245d187245b (patch) | |
tree | 9ec4516f85f04456fb25d25605eb9ed80dc28708 | |
parent | 8227d443103bf209d7a5d60b8dd9cabe8452ac55 (diff) |
added skytools.hashtext module
imported from skytools-2.0 (written by marko)
-rw-r--r-- | python/modules/hashtext.c | 428 | ||||
-rwxr-xr-x | setup_skytools.py | 8 |
2 files changed, 433 insertions, 3 deletions
diff --git a/python/modules/hashtext.c b/python/modules/hashtext.c new file mode 100644 index 00000000..f1de053c --- /dev/null +++ b/python/modules/hashtext.c @@ -0,0 +1,428 @@ +/* + * Postgres hashes for Python. + */ + +#define PY_SSIZE_T_CLEAN +#include <Python.h> + +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#endif + +#include <stdint.h> +#include <string.h> + + +typedef uint32_t (*hash_fn_t)(const void *src, unsigned src_len); + +typedef uint8_t uint8; +typedef uint16_t uint16; +typedef uint32_t uint32; + +#define rot(x, k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* + * Old Postgres hashtext() + */ + +#define mix_old(a,b,c) \ +{ \ + a -= b; a -= c; a ^= ((c)>>13); \ + b -= c; b -= a; b ^= ((a)<<8); \ + c -= a; c -= b; c ^= ((b)>>13); \ + a -= b; a -= c; a ^= ((c)>>12); \ + b -= c; b -= a; b ^= ((a)<<16); \ + c -= a; c -= b; c ^= ((b)>>5); \ + a -= b; a -= c; a ^= ((c)>>3); \ + b -= c; b -= a; b ^= ((a)<<10); \ + c -= a; c -= b; c ^= ((b)>>15); \ +} + +static uint32_t hash_old_hashtext(const void *_k, unsigned keylen) +{ + const unsigned char *k = _k; + register uint32 a, b, c, len; + + /* Set up the internal state */ + len = keylen; + a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ + c = 3923095; /* initialize with an arbitrary value */ + + /* handle most of the key */ + while (len >= 12) + { + a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24)); + b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24)); + c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24)); + mix_old(a, b, c); + k += 12; + len -= 12; + } + + /* handle the last 11 bytes */ + c += keylen; + switch (len) /* all the case statements fall through */ + { + case 11: + c += ((uint32) k[10] << 24); + case 10: + c += ((uint32) k[9] << 16); + case 9: + c += ((uint32) k[8] << 8); + /* the first byte of c is reserved for the length */ + case 8: + b += ((uint32) k[7] << 24); + case 7: + b += ((uint32) k[6] << 16); + case 6: + b += ((uint32) k[5] << 8); + case 5: + b += k[4]; + case 4: + a += ((uint32) k[3] << 24); + case 3: + a += ((uint32) k[2] << 16); + case 2: + a += ((uint32) k[1] << 8); + case 1: + a += k[0]; + /* case 0: nothing left to add */ + } + mix_old(a, b, c); + + /* report the result */ + return c; +} + + +/* + * New Postgres hashtext() + */ + +#define UINT32_ALIGN_MASK 3 + +#define mix_new(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +#define final_new(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c, 4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +static uint32_t hash_new_hashtext(const void *_k, unsigned keylen) +{ + const unsigned char *k = _k; + uint32_t a, b, c, len; + + /* Set up the internal state */ + len = keylen; + a = b = c = 0x9e3779b9 + len + 3923095; + + /* If the source pointer is word-aligned, we use word-wide fetches */ + if (((long) k & UINT32_ALIGN_MASK) == 0) + { + /* Code path for aligned source data */ + register const uint32_t *ka = (const uint32_t *) k; + + /* handle most of the key */ + while (len >= 12) + { + a += ka[0]; + b += ka[1]; + c += ka[2]; + mix_new(a, b, c); + ka += 3; + len -= 12; + } + + /* handle the last 11 bytes */ + k = (const unsigned char *) ka; +#ifdef WORDS_BIGENDIAN + switch (len) + { + case 11: + c += ((uint32) k[10] << 8); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 24); + /* the lowest byte of c is reserved for the length */ + /* fall through */ + case 8: + b += ka[1]; + a += ka[0]; + break; + case 7: + b += ((uint32) k[6] << 8); + /* fall through */ + case 6: + b += ((uint32) k[5] << 16); + /* fall through */ + case 5: + b += ((uint32) k[4] << 24); + /* fall through */ + case 4: + a += ka[0]; + break; + case 3: + a += ((uint32) k[2] << 8); + /* fall through */ + case 2: + a += ((uint32) k[1] << 16); + /* fall through */ + case 1: + a += ((uint32) k[0] << 24); + /* case 0: nothing left to add */ + } +#else /* !WORDS_BIGENDIAN */ + switch (len) + { + case 11: + c += ((uint32) k[10] << 24); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 8); + /* the lowest byte of c is reserved for the length */ + /* fall through */ + case 8: + b += ka[1]; + a += ka[0]; + break; + case 7: + b += ((uint32) k[6] << 16); + /* fall through */ + case 6: + b += ((uint32) k[5] << 8); + /* fall through */ + case 5: + b += k[4]; + /* fall through */ + case 4: + a += ka[0]; + break; + case 3: + a += ((uint32) k[2] << 16); + /* fall through */ + case 2: + a += ((uint32) k[1] << 8); + /* fall through */ + case 1: + a += k[0]; + /* case 0: nothing left to add */ + } +#endif /* WORDS_BIGENDIAN */ + } + else + { + /* Code path for non-aligned source data */ + + /* handle most of the key */ + while (len >= 12) + { +#ifdef WORDS_BIGENDIAN + a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24)); + b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24)); + c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24)); +#else /* !WORDS_BIGENDIAN */ + a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24)); + b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24)); + c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24)); +#endif /* WORDS_BIGENDIAN */ + mix_new(a, b, c); + k += 12; + len -= 12; + } + + /* handle the last 11 bytes */ +#ifdef WORDS_BIGENDIAN + switch (len) /* all the case statements fall through */ + { + case 11: + c += ((uint32) k[10] << 8); + case 10: + c += ((uint32) k[9] << 16); + case 9: + c += ((uint32) k[8] << 24); + /* the lowest byte of c is reserved for the length */ + case 8: + b += k[7]; + case 7: + b += ((uint32) k[6] << 8); + case 6: + b += ((uint32) k[5] << 16); + case 5: + b += ((uint32) k[4] << 24); + case 4: + a += k[3]; + case 3: + a += ((uint32) k[2] << 8); + case 2: + a += ((uint32) k[1] << 16); + case 1: + a += ((uint32) k[0] << 24); + /* case 0: nothing left to add */ + } +#else /* !WORDS_BIGENDIAN */ + switch (len) /* all the case statements fall through */ + { + case 11: + c += ((uint32) k[10] << 24); + case 10: + c += ((uint32) k[9] << 16); + case 9: + c += ((uint32) k[8] << 8); + /* the lowest byte of c is reserved for the length */ + case 8: + b += ((uint32) k[7] << 24); + case 7: + b += ((uint32) k[6] << 16); + case 6: + b += ((uint32) k[5] << 8); + case 5: + b += k[4]; + case 4: + a += ((uint32) k[3] << 24); + case 3: + a += ((uint32) k[2] << 16); + case 2: + a += ((uint32) k[1] << 8); + case 1: + a += k[0]; + /* case 0: nothing left to add */ + } +#endif /* WORDS_BIGENDIAN */ + } + + final_new(a, b, c); + + /* report the result */ + return c; +} + +/* + * Get string data from Python object. + */ + +static Py_ssize_t get_buffer(PyObject *obj, unsigned char **buf_p, PyObject **tmp_obj_p) +{ + PyBufferProcs *bfp; + PyObject *str = NULL; + Py_ssize_t res; + + /* check for None */ + if (obj == Py_None) { + PyErr_Format(PyExc_TypeError, "None is not allowed"); + return -1; + } + + /* is string or unicode ? */ + if (PyString_Check(obj) || PyUnicode_Check(obj)) { + if (PyString_AsStringAndSize(obj, (char**)buf_p, &res) < 0) + return -1; + return res; + } + + /* try to get buffer */ + bfp = obj->ob_type->tp_as_buffer; + if (bfp && bfp->bf_getsegcount && bfp->bf_getreadbuffer) { + if (bfp->bf_getsegcount(obj, NULL) == 1) + return bfp->bf_getreadbuffer(obj, 0, (void**)buf_p); + } + + /* + * Not a string-like object, run str() or it. + */ + + /* are we in recursion? */ + if (tmp_obj_p == NULL) { + PyErr_Format(PyExc_TypeError, "Cannot convert to string - get_buffer() recusively failed"); + return -1; + } + + /* do str() then */ + str = PyObject_Str(obj); + res = -1; + if (str != NULL) { + res = get_buffer(str, buf_p, NULL); + if (res >= 0) { + *tmp_obj_p = str; + } else { + Py_CLEAR(str); + } + } + return res; +} + +/* + * Common argument parsing. + */ + +static PyObject *run_hash(PyObject *args, hash_fn_t real_hash) +{ + unsigned char *src = NULL; + Py_ssize_t src_len; + PyObject *arg, *strtmp = NULL; + int32_t hash; + + if (!PyArg_ParseTuple(args, "O", &arg)) + return NULL; + src_len = get_buffer(arg, &src, &strtmp); + if (src_len < 0) + return NULL; + hash = real_hash(src, src_len); + Py_CLEAR(strtmp); + return PyInt_FromLong(hash); +} + +/* + * Python wrappers around actual hash functions. + */ + +static PyObject *hashtext_old(PyObject *self, PyObject *args) +{ + return run_hash(args, hash_old_hashtext); +} + +static PyObject *hashtext_new(PyObject *self, PyObject *args) +{ + return run_hash(args, hash_new_hashtext); +} + +/* + * Module initialization + */ + +static PyMethodDef methods[] = { + { "hashtext_old", hashtext_old, METH_VARARGS, "Old Postgres hashtext().\n" }, + { "hashtext_new", hashtext_new, METH_VARARGS, "New Postgres hashtext().\n" }, + { NULL } +}; + +PyMODINIT_FUNC +inithashtext(void) +{ + PyObject *module; + module = Py_InitModule("hashtext", methods); + PyModule_AddStringConstant(module, "__doc__", "String hash functions"); +} + diff --git a/setup_skytools.py b/setup_skytools.py index e5dd97e5..648b9aac 100755 --- a/setup_skytools.py +++ b/setup_skytools.py @@ -178,8 +178,11 @@ class sk3_install(install): # check if building C is allowed c_modules = [] if BUILD_C_MOD: - ext = Extension("skytools._cquoting", ['python/modules/cquoting.c']) - c_modules.append(ext) + ext = [ + Extension("skytools._cquoting", ['python/modules/cquoting.c']), + Extension("skytools.hashtext", ['python/modules/hashtext.c']), + ] + c_modules.extend(ext) # run actual setup setup( @@ -206,4 +209,3 @@ setup( 'install': sk3_install, }, ) - |