summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJilles Oldenbeuving <ojilles@gmail.com>2011-03-27 14:12:24 +0200
committerJehiah Czebotar <jehiah@gmail.com>2014-07-18 20:50:08 -0400
commit14d47799da58b5a03fe325a40ea0ac54117b7978 (patch)
treef7a74a21748c213d6000333f958aab14f4dfed23
parentd7435155c56a2b07bc6c26d0d65f915db1cc5cc8 (diff)
downloaddata_hacks-14d47799da58b5a03fe325a40ea0ac54117b7978.zip
data_hacks-14d47799da58b5a03fe325a40ea0ac54117b7978.tar.gz
data_hacks-14d47799da58b5a03fe325a40ea0ac54117b7978.tar.bz2
Added the ability to take in aggregated data in bar_chart, and updated
the documentation along with it.
-rw-r--r--README.markdown6
-rw-r--r--data_hacks/bar_chart.py8
2 files changed, 13 insertions, 1 deletions
diff --git a/README.markdown b/README.markdown
index 369fc5b..3bfbf8c 100644
--- a/README.markdown
+++ b/README.markdown
@@ -83,3 +83,9 @@ Generate an ascii bar chart for input data (this is like a visualization of `uni
20:5 [ 79] ***************************************
21:0 [ 64] ********************************
21:1 [ 8] ****
+
+bar_chart.py also supports ingesting aggregated values. Simply provide a two column input of key<space>value:
+
+ $ cat data | uniq -c | bar_chart.py --sort-keys --agg-values
+
+This is very convenient if you pull data out, say Hadoop or MySQL already aggregated.
diff --git a/data_hacks/bar_chart.py b/data_hacks/bar_chart.py
index f06c3ef..1823e25 100644
--- a/data_hacks/bar_chart.py
+++ b/data_hacks/bar_chart.py
@@ -38,7 +38,11 @@ def load_stream(input_stream):
def run(input_stream, options):
data = defaultdict(lambda:0)
for row in input_stream:
- data[row]+=1
+ if options.agg_values:
+ kv = row.split(' ',2);
+ data[kv[0]]+= int(kv[1])
+ if not options.agg_values:
+ data[row]+=1
if not data:
print "Error: no data"
@@ -71,6 +75,8 @@ def run(input_stream, options):
if __name__ == "__main__":
parser = OptionParser()
parser.usage = "cat data | %prog [options]"
+ parser.add_option("-a", "--agg-values", dest="agg_values", default=False, action="store_true",
+ help="Two column input format, space seperated with key<space>value")
parser.add_option("-k", "--sort-keys", dest="sort_keys", default=True, action="store_true",
help="sort by the key [default]")
parser.add_option("-v", "--sort-values", dest="sort_values", default=False, action="store_true",