diff options
author | Jilles Oldenbeuving <ojilles@gmail.com> | 2011-03-27 14:12:24 +0200 |
---|---|---|
committer | Jehiah Czebotar <jehiah@gmail.com> | 2014-07-18 20:50:08 -0400 |
commit | 14d47799da58b5a03fe325a40ea0ac54117b7978 (patch) | |
tree | f7a74a21748c213d6000333f958aab14f4dfed23 | |
parent | d7435155c56a2b07bc6c26d0d65f915db1cc5cc8 (diff) | |
download | data_hacks-14d47799da58b5a03fe325a40ea0ac54117b7978.zip data_hacks-14d47799da58b5a03fe325a40ea0ac54117b7978.tar.gz data_hacks-14d47799da58b5a03fe325a40ea0ac54117b7978.tar.bz2 |
Added the ability to take in aggregated data in bar_chart, and updated
the documentation along with it.
-rw-r--r-- | README.markdown | 6 | ||||
-rw-r--r-- | data_hacks/bar_chart.py | 8 |
2 files changed, 13 insertions, 1 deletions
diff --git a/README.markdown b/README.markdown index 369fc5b..3bfbf8c 100644 --- a/README.markdown +++ b/README.markdown @@ -83,3 +83,9 @@ Generate an ascii bar chart for input data (this is like a visualization of `uni 20:5 [ 79] *************************************** 21:0 [ 64] ******************************** 21:1 [ 8] **** + +bar_chart.py also supports ingesting aggregated values. Simply provide a two column input of key<space>value: + + $ cat data | uniq -c | bar_chart.py --sort-keys --agg-values + +This is very convenient if you pull data out, say Hadoop or MySQL already aggregated. diff --git a/data_hacks/bar_chart.py b/data_hacks/bar_chart.py index f06c3ef..1823e25 100644 --- a/data_hacks/bar_chart.py +++ b/data_hacks/bar_chart.py @@ -38,7 +38,11 @@ def load_stream(input_stream): def run(input_stream, options): data = defaultdict(lambda:0) for row in input_stream: - data[row]+=1 + if options.agg_values: + kv = row.split(' ',2); + data[kv[0]]+= int(kv[1]) + if not options.agg_values: + data[row]+=1 if not data: print "Error: no data" @@ -71,6 +75,8 @@ def run(input_stream, options): if __name__ == "__main__": parser = OptionParser() parser.usage = "cat data | %prog [options]" + parser.add_option("-a", "--agg-values", dest="agg_values", default=False, action="store_true", + help="Two column input format, space seperated with key<space>value") parser.add_option("-k", "--sort-keys", dest="sort_keys", default=True, action="store_true", help="sort by the key [default]") parser.add_option("-v", "--sort-values", dest="sort_values", default=False, action="store_true", |