diff options
-rw-r--r-- | README.markdown | 6 | ||||
-rw-r--r-- | data_hacks/bar_chart.py | 8 |
2 files changed, 13 insertions, 1 deletions
diff --git a/README.markdown b/README.markdown index 369fc5b..3bfbf8c 100644 --- a/README.markdown +++ b/README.markdown @@ -83,3 +83,9 @@ Generate an ascii bar chart for input data (this is like a visualization of `uni 20:5 [ 79] *************************************** 21:0 [ 64] ******************************** 21:1 [ 8] **** + +bar_chart.py also supports ingesting aggregated values. Simply provide a two column input of key<space>value: + + $ cat data | uniq -c | bar_chart.py --sort-keys --agg-values + +This is very convenient if you pull data out, say Hadoop or MySQL already aggregated. diff --git a/data_hacks/bar_chart.py b/data_hacks/bar_chart.py index f06c3ef..1823e25 100644 --- a/data_hacks/bar_chart.py +++ b/data_hacks/bar_chart.py @@ -38,7 +38,11 @@ def load_stream(input_stream): def run(input_stream, options): data = defaultdict(lambda:0) for row in input_stream: - data[row]+=1 + if options.agg_values: + kv = row.split(' ',2); + data[kv[0]]+= int(kv[1]) + if not options.agg_values: + data[row]+=1 if not data: print "Error: no data" @@ -71,6 +75,8 @@ def run(input_stream, options): if __name__ == "__main__": parser = OptionParser() parser.usage = "cat data | %prog [options]" + parser.add_option("-a", "--agg-values", dest="agg_values", default=False, action="store_true", + help="Two column input format, space seperated with key<space>value") parser.add_option("-k", "--sort-keys", dest="sort_keys", default=True, action="store_true", help="sort by the key [default]") parser.add_option("-v", "--sort-values", dest="sort_values", default=False, action="store_true", |