diff options
author | Elazar Leibovich <elazarl@gmail.com> | 2015-07-03 15:27:09 +0300 |
---|---|---|
committer | Elazar Leibovich <elazarl@gmail.com> | 2015-07-03 15:27:09 +0300 |
commit | c9bfb896d4a6322b0dead10640a865dd20f35ed3 (patch) | |
tree | 2454f6ed4f18c99590665b8ec151df75a82c4822 | |
parent | 9078660a5fc27f9c15dc9e5e3956b203a0967db1 (diff) | |
download | data_hacks-c9bfb896d4a6322b0dead10640a865dd20f35ed3.zip data_hacks-c9bfb896d4a6322b0dead10640a865dd20f35ed3.tar.gz data_hacks-c9bfb896d4a6322b0dead10640a865dd20f35ed3.tar.bz2 |
explain logarithmic buckets calculation
-rwxr-xr-x | data_hacks/histogram.py | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py index 605f54e..04dcf68 100755 --- a/data_hacks/histogram.py +++ b/data_hacks/histogram.py @@ -179,12 +179,21 @@ def histogram(stream, options): if buckets <= 0: raise ValueError('# of buckets must be > 0') - def fx(k, n): + def first_bucket_size(k, n): + """Logarithmic buckets means, the size of bucket i+1 is twice + the size of bucket i. + For k+1 buckets whose sum is n, we have + (note, k+1 buckets, since 0 is counted as well): + \sum_{i=0}^{k} x*2^i = n + x * \sum_{i=0}^{k} 2^i = n + x * (2^{k+1} - 1) = n + x = n/(2^{k+1} - 1) + """ return n/(2**(k+1)-1) def log_steps(k, n): "k logarithmic steps whose sum is n" - x = fx(k-1, n) + x = first_bucket_size(k-1, n) sum = 0 for i in range(k): sum += 2**i * x |