diff options
Diffstat (limited to 'python/skytools/pyhashtext.py')
-rw-r--r-- | python/skytools/pyhashtext.py | 145 |
1 files changed, 145 insertions, 0 deletions
diff --git a/python/skytools/pyhashtext.py b/python/skytools/pyhashtext.py new file mode 100644 index 00000000..7274de98 --- /dev/null +++ b/python/skytools/pyhashtext.py @@ -0,0 +1,145 @@ +""" +Pure python implementation of Postgres hashes + +>>> import skytools.hashtext +>>> data = 'HypficUjFitraxlumCitcemkiOkIkthi' +>>> p = [hashtext_old_py(data[:l]) for l in range(1, len(data)+1)] +>>> c = [hashtext_old(data[:l]) for l in range(1, len(data)+1)] +>>> assert p == c, '%s <> %s' % (p, c) + +>>> p = [hashtext_new_py(data[:l]) for l in range(1, len(data)+1)] +>>> c = [hashtext_new(data[:l]) for l in range(1, len(data)+1)] +>>> assert p == c, '%s <> %s' % (p, c) + +""" + +import struct + +__all__ = [ + "hashtext_old_py", "hashtext_new_py", + "hashtext_old", "hashtext_new" +] + + +padding = '\0' * 12 + +def uint32(x): + """python does not have 32 bit integer so we need this hack to produce uint32 after bit operations""" + return x & 0xffffffff + +# +# Old Postgres hashtext() +# + +fmt_old = struct.Struct("<LLL") + +def mix_old(a,b,c): + c = uint32(c) + + a -= b; a -= c; a = uint32(a ^ (c>>13)) + b -= c; b -= a; b = uint32(b ^ (a<<8)) + c -= a; c -= b; c = uint32(c ^ (b>>13)) + a -= b; a -= c; a = uint32(a ^ (c>>12)) + b -= c; b -= a; b = uint32(b ^ (a<<16)) + c -= a; c -= b; c = uint32(c ^ (b>>5)) + a -= b; a -= c; a = uint32(a ^ (c>>3)) + b -= c; b -= a; b = uint32(b ^ (a<<10)) + c -= a; c -= b; c = uint32(c ^ (b>>15)) + + return a, b, c + +def hashtext_old_py(k): + keylen = lenpos = len(k) + p = 0 + a = b = 0x9e3779b9 + c = 3923095 + + # handle most of the key + while lenpos >= 12: + a2, b2, c2 = fmt_old.unpack_from(k, p) + a, b, c = mix_old(a + a2, b + b2, c + c2) + p += 12; + lenpos -= 12; + + # handle the last 11 bytes + a2, b2, c2 = fmt_old.unpack_from(k[p:] + padding, 0) + # the lowest byte of c is reserved for the length + c += keylen; + c2 = c2 << 8 + + a, b, c = mix_old(a + a2, b + b2, c + c2) + + # convert to signed int + if (c & 0x80000000): + c = -0x100000000 + c + + return c + + +# +# New Postgres hashtext() +# + +fmt_new = struct.Struct("=LLL") + +def rol32(x,k): + return (((x)<<(k)) | (uint32(x)>>(32-(k)))) + +def mix_new(a,b,c): + a -= c; a ^= rol32(c, 4); c += b + b -= a; b ^= rol32(a, 6); a += c + c -= b; c ^= rol32(b, 8); b += a + a -= c; a ^= rol32(c,16); c += b + b -= a; b ^= rol32(a,19); a += c + c -= b; c ^= rol32(b, 4); b += a + + return uint32(a), uint32(b), uint32(c) + +def final_new(a,b,c): + c ^= b; c -= rol32(b,14) + a ^= c; a -= rol32(c,11) + b ^= a; b -= rol32(a,25) + c ^= b; c -= rol32(b,16) + a ^= c; a -= rol32(c, 4) + b ^= a; b -= rol32(a,14) + c ^= b; c -= rol32(b,24) + + return uint32(a), uint32(b), uint32(c) + +def hashtext_new_py(k): + keylen = lenpos = len(k) + p = 0 + a = b = c = 0x9e3779b9 + keylen + 3923095 + + # handle most of the key + while lenpos >= 12: + a2, b2, c2 = fmt_new.unpack_from(k, p) + a, b, c = mix_new(a + a2, b + b2, c + c2) + p += 12; + lenpos -= 12; + + # handle the last 11 bytes + a2, b2, c2 = fmt_new.unpack_from(k[p:] + padding, 0) + # the lowest byte of c is reserved + c2 = c2 << 8 + + a, b, c = final_new(a + a2, b + b2, c + c2) + + # convert to signed int + if (c & 0x80000000): + c = -0x100000000 + c + + return c + + +try: + from skytools.hashtext import hashtext_old, hashtext_new +except ImportError: + hashtext_old = hashtext_old_py + hashtext_new = hashtext_new_py + + +# run doctest +if __name__ == '__main__': + import doctest + doctest.testmod() |