summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetr Jelinek2012-12-19 23:15:02 +0000
committerPetr Jelinek2012-12-19 23:15:02 +0000
commitecfac4054fcf969e7ef2f30bffe25032b3c1b7cc (patch)
treec91c304b28eccc35af9694455a031ac3b2ce0fa2
parent65e634d235cd27253babe8cc40d57d097f9fe250 (diff)
Pure Python version of hashtext functions, also make hashtext functions available directy as the toplevel api.
-rw-r--r--python/skytools/__init__.py3
-rw-r--r--python/skytools/pyhashtext.py145
2 files changed, 148 insertions, 0 deletions
diff --git a/python/skytools/__init__.py b/python/skytools/__init__.py
index 67362996..8b00d59d 100644
--- a/python/skytools/__init__.py
+++ b/python/skytools/__init__.py
@@ -133,6 +133,9 @@ _symbols = {
'parse_iso_timestamp': 'skytools.timeutil:parse_iso_timestamp',
# skytools.utf8
'safe_utf8_decode': 'skytools.utf8:safe_utf8_decode',
+ # hashing
+ 'hashtext_old': 'skytools.pyhashtext:hashtext_old',
+ 'hashtext_new': 'skytools.pyhashtext:hashtext_new',
}
__all__ = _symbols.keys()
diff --git a/python/skytools/pyhashtext.py b/python/skytools/pyhashtext.py
new file mode 100644
index 00000000..7274de98
--- /dev/null
+++ b/python/skytools/pyhashtext.py
@@ -0,0 +1,145 @@
+"""
+Pure python implementation of Postgres hashes
+
+>>> import skytools.hashtext
+>>> data = 'HypficUjFitraxlumCitcemkiOkIkthi'
+>>> p = [hashtext_old_py(data[:l]) for l in range(1, len(data)+1)]
+>>> c = [hashtext_old(data[:l]) for l in range(1, len(data)+1)]
+>>> assert p == c, '%s <> %s' % (p, c)
+
+>>> p = [hashtext_new_py(data[:l]) for l in range(1, len(data)+1)]
+>>> c = [hashtext_new(data[:l]) for l in range(1, len(data)+1)]
+>>> assert p == c, '%s <> %s' % (p, c)
+
+"""
+
+import struct
+
+__all__ = [
+ "hashtext_old_py", "hashtext_new_py",
+ "hashtext_old", "hashtext_new"
+]
+
+
+padding = '\0' * 12
+
+def uint32(x):
+ """python does not have 32 bit integer so we need this hack to produce uint32 after bit operations"""
+ return x & 0xffffffff
+
+#
+# Old Postgres hashtext()
+#
+
+fmt_old = struct.Struct("<LLL")
+
+def mix_old(a,b,c):
+ c = uint32(c)
+
+ a -= b; a -= c; a = uint32(a ^ (c>>13))
+ b -= c; b -= a; b = uint32(b ^ (a<<8))
+ c -= a; c -= b; c = uint32(c ^ (b>>13))
+ a -= b; a -= c; a = uint32(a ^ (c>>12))
+ b -= c; b -= a; b = uint32(b ^ (a<<16))
+ c -= a; c -= b; c = uint32(c ^ (b>>5))
+ a -= b; a -= c; a = uint32(a ^ (c>>3))
+ b -= c; b -= a; b = uint32(b ^ (a<<10))
+ c -= a; c -= b; c = uint32(c ^ (b>>15))
+
+ return a, b, c
+
+def hashtext_old_py(k):
+ keylen = lenpos = len(k)
+ p = 0
+ a = b = 0x9e3779b9
+ c = 3923095
+
+ # handle most of the key
+ while lenpos >= 12:
+ a2, b2, c2 = fmt_old.unpack_from(k, p)
+ a, b, c = mix_old(a + a2, b + b2, c + c2)
+ p += 12;
+ lenpos -= 12;
+
+ # handle the last 11 bytes
+ a2, b2, c2 = fmt_old.unpack_from(k[p:] + padding, 0)
+ # the lowest byte of c is reserved for the length
+ c += keylen;
+ c2 = c2 << 8
+
+ a, b, c = mix_old(a + a2, b + b2, c + c2)
+
+ # convert to signed int
+ if (c & 0x80000000):
+ c = -0x100000000 + c
+
+ return c
+
+
+#
+# New Postgres hashtext()
+#
+
+fmt_new = struct.Struct("=LLL")
+
+def rol32(x,k):
+ return (((x)<<(k)) | (uint32(x)>>(32-(k))))
+
+def mix_new(a,b,c):
+ a -= c; a ^= rol32(c, 4); c += b
+ b -= a; b ^= rol32(a, 6); a += c
+ c -= b; c ^= rol32(b, 8); b += a
+ a -= c; a ^= rol32(c,16); c += b
+ b -= a; b ^= rol32(a,19); a += c
+ c -= b; c ^= rol32(b, 4); b += a
+
+ return uint32(a), uint32(b), uint32(c)
+
+def final_new(a,b,c):
+ c ^= b; c -= rol32(b,14)
+ a ^= c; a -= rol32(c,11)
+ b ^= a; b -= rol32(a,25)
+ c ^= b; c -= rol32(b,16)
+ a ^= c; a -= rol32(c, 4)
+ b ^= a; b -= rol32(a,14)
+ c ^= b; c -= rol32(b,24)
+
+ return uint32(a), uint32(b), uint32(c)
+
+def hashtext_new_py(k):
+ keylen = lenpos = len(k)
+ p = 0
+ a = b = c = 0x9e3779b9 + keylen + 3923095
+
+ # handle most of the key
+ while lenpos >= 12:
+ a2, b2, c2 = fmt_new.unpack_from(k, p)
+ a, b, c = mix_new(a + a2, b + b2, c + c2)
+ p += 12;
+ lenpos -= 12;
+
+ # handle the last 11 bytes
+ a2, b2, c2 = fmt_new.unpack_from(k[p:] + padding, 0)
+ # the lowest byte of c is reserved
+ c2 = c2 << 8
+
+ a, b, c = final_new(a + a2, b + b2, c + c2)
+
+ # convert to signed int
+ if (c & 0x80000000):
+ c = -0x100000000 + c
+
+ return c
+
+
+try:
+ from skytools.hashtext import hashtext_old, hashtext_new
+except ImportError:
+ hashtext_old = hashtext_old_py
+ hashtext_new = hashtext_new_py
+
+
+# run doctest
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()