#!/usr/bin/python
# Produce hash saturation tables a la "Turbid: A High-Entropy Randomness
# Generator" section 3 / table 1
#
# For more information, see the paper:
# http://www.av8n.com/turbid/paper/turbid.htm#htoc3
#
# Copyright (C) 2013 Jeff Epler
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
from __future__ import division
import itertools
from math import log1p, exp, log
def binomial_pmfgen(n, p):
"""Generate binomial distribution PMF f(k; n, p) for k=1...n
Instead of directly computing the product, compute the log of the product
as a partial sum and then take the exponent of the sum; this avoids trouble
with the very extreme numbers involved. Earlier iterations used careful
summing, but that turned out not to be important to matching the paper's
values.
"""
log_p = log(p)
log_1mp = log1p(-p)
partial_sum = 0
for k in itertools.count(1):
if k > n: return
partial_sum += log(n-k+1) - log(k) # n-k+1, k nonzero
yield k, exp(partial_sum + k * log_p + log_1mp * (n-k))
def ent(N, C):
"""Compute the entropy of a 'perfectly well-behaved hash'
This still misbehaves for N >>> C (e.g., N=2**33, C=2**16), as well as
getting very slow, but otherwise matches the values given in the paper"""
p = 1/C
s = t = 0
partial_sum = 0
for m, pmf in binomial_pmfgen(N, p):
si = (m/N) * log(m/N, 2) * C * pmf
if si == 0 and partial_sum: break # Past values that can contribute
partial_sum += si
return -partial_sum
def entropy_table(log_c, sin_range):
print "Entropy for perfectly well-behaved hash of %d bits" % log_c
print "%11s %8s %7s" % ("Entropy In", "Out", "%")
for sin in sin_range:
sout = ent(2**sin, 2**log_c)
pct = sout*100/log_c
print "%11g %8.4f %7.4f" % (sin, sout, pct)
print
if __name__ == '__main__':
# A testsuite of sorts...
assert "%.2f" % ent(2**1, 2**160) == "1.00"
assert "%.2f" % ent(2**3, 2**160) == "3.00"
assert "%.2f" % ent(2**155, 2**160) == "154.97"
assert "%.2f" % ent(2**160, 2**160) == "159.17"
assert "%.2f" % ent(2**165, 2**160) == "159.98"
assert "%.4f" % ent(2**170, 2**160) == "159.9993"
entropy_table(160, (1, 2, 3, 150, 155, 160, 165, 170))
entropy_table(16, range(1, 29))