summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Kreen2012-12-20 08:53:06 +0000
committerMarko Kreen2012-12-20 09:23:43 +0000
commit7908cf38738c8d918e3d23734b6dac854ff703fa (patch)
tree76465a8e8d67114347557da73f83afb9e9e0ac92
parent2d408dd8ee981172bce444164402cf2ce05cd5ec (diff)
pyhashtext: minor cleanups
- uppercase constants - rename loop constants - add literal value test - add equality test, just in case 'assert' is disabled - convert result to non-long int - no shift when big-endian (in hashtext_new)
-rw-r--r--python/skytools/pyhashtext.py88
1 files changed, 53 insertions, 35 deletions
diff --git a/python/skytools/pyhashtext.py b/python/skytools/pyhashtext.py
index 7274de98..17795f3b 100644
--- a/python/skytools/pyhashtext.py
+++ b/python/skytools/pyhashtext.py
@@ -2,36 +2,48 @@
Pure python implementation of Postgres hashes
>>> import skytools.hashtext
+>>> for i in range(3):
+... print [hashtext_new_py('x' * (i*5 + j)) for j in range(5)]
+[-1477818771, 1074944137, -1086392228, -1992236649, -1379736791]
+[-370454118, 1489915569, -66683019, -2126973000, 1651296771]
+[755764456, -1494243903, 631527812, 28686851, -9498641]
+>>> for i in range(3):
+... print [hashtext_old_py('x' * (i*5 + j)) for j in range(5)]
+[-863449762, 37835117, 294739542, -320432768, 1007638138]
+[1422906842, -261065348, 59863994, -162804943, 1736144510]
+[-682756517, 317827663, -495599455, -1411793989, 1739997714]
>>> data = 'HypficUjFitraxlumCitcemkiOkIkthi'
->>> p = [hashtext_old_py(data[:l]) for l in range(1, len(data)+1)]
->>> c = [hashtext_old(data[:l]) for l in range(1, len(data)+1)]
+>>> p = [hashtext_old_py(data[:l]) for l in range(len(data)+1)]
+>>> c = [hashtext_old(data[:l]) for l in range(len(data)+1)]
>>> assert p == c, '%s <> %s' % (p, c)
-
->>> p = [hashtext_new_py(data[:l]) for l in range(1, len(data)+1)]
->>> c = [hashtext_new(data[:l]) for l in range(1, len(data)+1)]
+>>> p == c
+True
+>>> p = [hashtext_new_py(data[:l]) for l in range(len(data)+1)]
+>>> c = [hashtext_new(data[:l]) for l in range(len(data)+1)]
>>> assert p == c, '%s <> %s' % (p, c)
-
+>>> p == c
+True
"""
-import struct
+import sys, struct
__all__ = [
"hashtext_old_py", "hashtext_new_py",
"hashtext_old", "hashtext_new"
]
-
-padding = '\0' * 12
+# pad for last partial block
+PADDING = '\0' * 12
def uint32(x):
"""python does not have 32 bit integer so we need this hack to produce uint32 after bit operations"""
return x & 0xffffffff
#
-# Old Postgres hashtext()
+# Old Postgres hashtext() - lookup2 with custom initval
#
-fmt_old = struct.Struct("<LLL")
+FMT_OLD = struct.Struct("<LLL")
def mix_old(a,b,c):
c = uint32(c)
@@ -49,23 +61,25 @@ def mix_old(a,b,c):
return a, b, c
def hashtext_old_py(k):
- keylen = lenpos = len(k)
- p = 0
+ """Old Postgres hashtext()"""
+
+ remain = len(k)
+ pos = 0
a = b = 0x9e3779b9
c = 3923095
# handle most of the key
- while lenpos >= 12:
- a2, b2, c2 = fmt_old.unpack_from(k, p)
+ while remain >= 12:
+ a2, b2, c2 = FMT_OLD.unpack_from(k, pos)
a, b, c = mix_old(a + a2, b + b2, c + c2)
- p += 12;
- lenpos -= 12;
+ pos += 12;
+ remain -= 12;
# handle the last 11 bytes
- a2, b2, c2 = fmt_old.unpack_from(k[p:] + padding, 0)
+ a2, b2, c2 = FMT_OLD.unpack_from(k[pos:] + PADDING, 0)
+
# the lowest byte of c is reserved for the length
- c += keylen;
- c2 = c2 << 8
+ c2 = (c2 << 8) + len(k)
a, b, c = mix_old(a + a2, b + b2, c + c2)
@@ -73,14 +87,17 @@ def hashtext_old_py(k):
if (c & 0x80000000):
c = -0x100000000 + c
- return c
+ return int(c)
#
-# New Postgres hashtext()
+# New Postgres hashtext() - hacked lookup3:
+# - custom initval
+# - calls mix() when len=12
+# - shifted c in last block on little-endian
#
-fmt_new = struct.Struct("=LLL")
+FMT_NEW = struct.Struct("=LLL")
def rol32(x,k):
return (((x)<<(k)) | (uint32(x)>>(32-(k))))
@@ -107,29 +124,29 @@ def final_new(a,b,c):
return uint32(a), uint32(b), uint32(c)
def hashtext_new_py(k):
- keylen = lenpos = len(k)
- p = 0
- a = b = c = 0x9e3779b9 + keylen + 3923095
+ """New Postgres hashtext()"""
+ remain = len(k)
+ pos = 0
+ a = b = c = 0x9e3779b9 + len(k) + 3923095
# handle most of the key
- while lenpos >= 12:
- a2, b2, c2 = fmt_new.unpack_from(k, p)
+ while remain >= 12:
+ a2, b2, c2 = FMT_NEW.unpack_from(k, pos)
a, b, c = mix_new(a + a2, b + b2, c + c2)
- p += 12;
- lenpos -= 12;
+ pos += 12;
+ remain -= 12;
# handle the last 11 bytes
- a2, b2, c2 = fmt_new.unpack_from(k[p:] + padding, 0)
- # the lowest byte of c is reserved
- c2 = c2 << 8
-
+ a2, b2, c2 = FMT_NEW.unpack_from(k[pos:] + PADDING, 0)
+ if sys.byteorder == 'little':
+ c2 = c2 << 8
a, b, c = final_new(a + a2, b + b2, c + c2)
# convert to signed int
if (c & 0x80000000):
c = -0x100000000 + c
- return c
+ return int(c)
try:
@@ -143,3 +160,4 @@ except ImportError:
if __name__ == '__main__':
import doctest
doctest.testmod()
+