summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorElazar Leibovich <elazarl@gmail.com>2015-07-03 15:27:09 +0300
committerElazar Leibovich <elazarl@gmail.com>2015-07-03 15:27:09 +0300
commitc9bfb896d4a6322b0dead10640a865dd20f35ed3 (patch)
tree2454f6ed4f18c99590665b8ec151df75a82c4822
parent9078660a5fc27f9c15dc9e5e3956b203a0967db1 (diff)
downloaddata_hacks-c9bfb896d4a6322b0dead10640a865dd20f35ed3.zip
data_hacks-c9bfb896d4a6322b0dead10640a865dd20f35ed3.tar.gz
data_hacks-c9bfb896d4a6322b0dead10640a865dd20f35ed3.tar.bz2
explain logarithmic buckets calculation
-rwxr-xr-xdata_hacks/histogram.py13
1 files changed, 11 insertions, 2 deletions
diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py
index 605f54e..04dcf68 100755
--- a/data_hacks/histogram.py
+++ b/data_hacks/histogram.py
@@ -179,12 +179,21 @@ def histogram(stream, options):
if buckets <= 0:
raise ValueError('# of buckets must be > 0')
- def fx(k, n):
+ def first_bucket_size(k, n):
+ """Logarithmic buckets means, the size of bucket i+1 is twice
+ the size of bucket i.
+ For k+1 buckets whose sum is n, we have
+ (note, k+1 buckets, since 0 is counted as well):
+ \sum_{i=0}^{k} x*2^i = n
+ x * \sum_{i=0}^{k} 2^i = n
+ x * (2^{k+1} - 1) = n
+ x = n/(2^{k+1} - 1)
+ """
return n/(2**(k+1)-1)
def log_steps(k, n):
"k logarithmic steps whose sum is n"
- x = fx(k-1, n)
+ x = first_bucket_size(k-1, n)
sum = 0
for i in range(k):
sum += 2**i * x