diff options
author | Marko Kreen | 2012-12-20 08:53:06 +0000 |
---|---|---|
committer | Marko Kreen | 2012-12-20 09:23:43 +0000 |
commit | 7908cf38738c8d918e3d23734b6dac854ff703fa (patch) | |
tree | 76465a8e8d67114347557da73f83afb9e9e0ac92 | |
parent | 2d408dd8ee981172bce444164402cf2ce05cd5ec (diff) |
pyhashtext: minor cleanups
- uppercase constants
- rename loop constants
- add literal value test
- add equality test, just in case 'assert' is disabled
- convert result to non-long int
- no shift when big-endian (in hashtext_new)
-rw-r--r-- | python/skytools/pyhashtext.py | 88 |
1 files changed, 53 insertions, 35 deletions
diff --git a/python/skytools/pyhashtext.py b/python/skytools/pyhashtext.py index 7274de98..17795f3b 100644 --- a/python/skytools/pyhashtext.py +++ b/python/skytools/pyhashtext.py @@ -2,36 +2,48 @@ Pure python implementation of Postgres hashes >>> import skytools.hashtext +>>> for i in range(3): +... print [hashtext_new_py('x' * (i*5 + j)) for j in range(5)] +[-1477818771, 1074944137, -1086392228, -1992236649, -1379736791] +[-370454118, 1489915569, -66683019, -2126973000, 1651296771] +[755764456, -1494243903, 631527812, 28686851, -9498641] +>>> for i in range(3): +... print [hashtext_old_py('x' * (i*5 + j)) for j in range(5)] +[-863449762, 37835117, 294739542, -320432768, 1007638138] +[1422906842, -261065348, 59863994, -162804943, 1736144510] +[-682756517, 317827663, -495599455, -1411793989, 1739997714] >>> data = 'HypficUjFitraxlumCitcemkiOkIkthi' ->>> p = [hashtext_old_py(data[:l]) for l in range(1, len(data)+1)] ->>> c = [hashtext_old(data[:l]) for l in range(1, len(data)+1)] +>>> p = [hashtext_old_py(data[:l]) for l in range(len(data)+1)] +>>> c = [hashtext_old(data[:l]) for l in range(len(data)+1)] >>> assert p == c, '%s <> %s' % (p, c) - ->>> p = [hashtext_new_py(data[:l]) for l in range(1, len(data)+1)] ->>> c = [hashtext_new(data[:l]) for l in range(1, len(data)+1)] +>>> p == c +True +>>> p = [hashtext_new_py(data[:l]) for l in range(len(data)+1)] +>>> c = [hashtext_new(data[:l]) for l in range(len(data)+1)] >>> assert p == c, '%s <> %s' % (p, c) - +>>> p == c +True """ -import struct +import sys, struct __all__ = [ "hashtext_old_py", "hashtext_new_py", "hashtext_old", "hashtext_new" ] - -padding = '\0' * 12 +# pad for last partial block +PADDING = '\0' * 12 def uint32(x): """python does not have 32 bit integer so we need this hack to produce uint32 after bit operations""" return x & 0xffffffff # -# Old Postgres hashtext() +# Old Postgres hashtext() - lookup2 with custom initval # -fmt_old = struct.Struct("<LLL") +FMT_OLD = struct.Struct("<LLL") def mix_old(a,b,c): c = uint32(c) @@ -49,23 +61,25 @@ def mix_old(a,b,c): return a, b, c def hashtext_old_py(k): - keylen = lenpos = len(k) - p = 0 + """Old Postgres hashtext()""" + + remain = len(k) + pos = 0 a = b = 0x9e3779b9 c = 3923095 # handle most of the key - while lenpos >= 12: - a2, b2, c2 = fmt_old.unpack_from(k, p) + while remain >= 12: + a2, b2, c2 = FMT_OLD.unpack_from(k, pos) a, b, c = mix_old(a + a2, b + b2, c + c2) - p += 12; - lenpos -= 12; + pos += 12; + remain -= 12; # handle the last 11 bytes - a2, b2, c2 = fmt_old.unpack_from(k[p:] + padding, 0) + a2, b2, c2 = FMT_OLD.unpack_from(k[pos:] + PADDING, 0) + # the lowest byte of c is reserved for the length - c += keylen; - c2 = c2 << 8 + c2 = (c2 << 8) + len(k) a, b, c = mix_old(a + a2, b + b2, c + c2) @@ -73,14 +87,17 @@ def hashtext_old_py(k): if (c & 0x80000000): c = -0x100000000 + c - return c + return int(c) # -# New Postgres hashtext() +# New Postgres hashtext() - hacked lookup3: +# - custom initval +# - calls mix() when len=12 +# - shifted c in last block on little-endian # -fmt_new = struct.Struct("=LLL") +FMT_NEW = struct.Struct("=LLL") def rol32(x,k): return (((x)<<(k)) | (uint32(x)>>(32-(k)))) @@ -107,29 +124,29 @@ def final_new(a,b,c): return uint32(a), uint32(b), uint32(c) def hashtext_new_py(k): - keylen = lenpos = len(k) - p = 0 - a = b = c = 0x9e3779b9 + keylen + 3923095 + """New Postgres hashtext()""" + remain = len(k) + pos = 0 + a = b = c = 0x9e3779b9 + len(k) + 3923095 # handle most of the key - while lenpos >= 12: - a2, b2, c2 = fmt_new.unpack_from(k, p) + while remain >= 12: + a2, b2, c2 = FMT_NEW.unpack_from(k, pos) a, b, c = mix_new(a + a2, b + b2, c + c2) - p += 12; - lenpos -= 12; + pos += 12; + remain -= 12; # handle the last 11 bytes - a2, b2, c2 = fmt_new.unpack_from(k[p:] + padding, 0) - # the lowest byte of c is reserved - c2 = c2 << 8 - + a2, b2, c2 = FMT_NEW.unpack_from(k[pos:] + PADDING, 0) + if sys.byteorder == 'little': + c2 = c2 << 8 a, b, c = final_new(a + a2, b + b2, c + c2) # convert to signed int if (c & 0x80000000): c = -0x100000000 + c - return c + return int(c) try: @@ -143,3 +160,4 @@ except ImportError: if __name__ == '__main__': import doctest doctest.testmod() + |