#!/usr/bin/python # Produce hash saturation tables a la "Turbid: A High-Entropy Randomness # Generator" section 3 / table 1 # # For more information, see the paper: # http://www.av8n.com/turbid/paper/turbid.htm#htoc3 # # Copyright (C) 2013 Jeff Epler # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA from __future__ import division import itertools from math import log1p, exp, log def binomial_pmfgen(n, p): """Generate binomial distribution PMF f(k; n, p) for k=1...n Instead of directly computing the product, compute the log of the product as a partial sum and then take the exponent of the sum; this avoids trouble with the very extreme numbers involved. Earlier iterations used careful summing, but that turned out not to be important to matching the paper's values. """ log_p = log(p) log_1mp = log1p(-p) partial_sum = 0 for k in itertools.count(1): if k > n: return partial_sum += log(n-k+1) - log(k) # n-k+1, k nonzero yield k, exp(partial_sum + k * log_p + log_1mp * (n-k)) def ent(N, C): """Compute the entropy of a 'perfectly well-behaved hash' This still misbehaves for N >>> C (e.g., N=2**33, C=2**16), as well as getting very slow, but otherwise matches the values given in the paper""" p = 1/C s = t = 0 partial_sum = 0 for m, pmf in binomial_pmfgen(N, p): si = (m/N) * log(m/N, 2) * C * pmf if si == 0 and partial_sum: break # Past values that can contribute partial_sum += si return -partial_sum def entropy_table(log_c, sin_range): print "Entropy for perfectly well-behaved hash of %d bits" % log_c print "%11s %8s %7s" % ("Entropy In", "Out", "%") for sin in sin_range: sout = ent(2**sin, 2**log_c) pct = sout*100/log_c print "%11g %8.4f %7.4f" % (sin, sout, pct) print if __name__ == '__main__': # A testsuite of sorts... assert "%.2f" % ent(2**1, 2**160) == "1.00" assert "%.2f" % ent(2**3, 2**160) == "3.00" assert "%.2f" % ent(2**155, 2**160) == "154.97" assert "%.2f" % ent(2**160, 2**160) == "159.17" assert "%.2f" % ent(2**165, 2**160) == "159.98" assert "%.4f" % ent(2**170, 2**160) == "159.9993" entropy_table(160, (1, 2, 3, 150, 155, 160, 165, 170)) entropy_table(16, range(1, 29))