summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xdata_hacks/bar_chart.py13
-rwxr-xr-xdata_hacks/histogram.py21
2 files changed, 23 insertions, 11 deletions
diff --git a/data_hacks/bar_chart.py b/data_hacks/bar_chart.py
index 3551860..13f76a8 100755
--- a/data_hacks/bar_chart.py
+++ b/data_hacks/bar_chart.py
@@ -41,11 +41,16 @@ def run(input_stream, options):
data = defaultdict(int)
total = 0
for row in input_stream:
- if options.agg_values:
- kv = row.replace('\t', ' ').split(' ',2);
+ if options.agg_key_value:
+ kv = row.rstrip().rsplit(None, 1)
value = int(kv[1])
data[kv[0]] += value
total += value
+ elif options.agg_value_key:
+ kv = row.lstrip().split(None, 1)
+ value = int(kv[0])
+ data[kv[1]] += value
+ total += value
else:
data[row] += 1
total += 1
@@ -85,7 +90,9 @@ def run(input_stream, options):
if __name__ == "__main__":
parser = OptionParser()
parser.usage = "cat data | %prog [options]"
- parser.add_option("-a", "--agg-values", dest="agg_values", default=False, action="store_true",
+ parser.add_option("-a", "--agg", dest="agg_value_key", default=False, action="store_true",
+ help="Two column input format, space seperated with value<space>key")
+ parser.add_option("-A", "--agg-key-value", dest="agg_key_value", default=False, action="store_true",
help="Two column input format, space seperated with key<space>value")
parser.add_option("-k", "--sort-keys", dest="sort_keys", default=True, action="store_true",
help="sort by the key [default]")
diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py
index 72b3806..042a02f 100755
--- a/data_hacks/histogram.py
+++ b/data_hacks/histogram.py
@@ -76,7 +76,7 @@ def test_mvsd():
assert '%.2f' % mvsd.var() == "8.25"
assert '%.14f' % mvsd.sd() == "2.87228132326901"
-def load_stream(input_stream, agg):
+def load_stream(input_stream, agg_value_key, agg_key_value):
for line in input_stream:
clean_line = line.strip()
if not clean_line:
@@ -85,11 +85,14 @@ def load_stream(input_stream, agg):
if clean_line[0] in ['"', "'"]:
clean_line = clean_line.strip("\"'")
try:
- if agg:
- value, count = line.replace("\t", ' ').split(' ', 2)
- yield DataPoint(Decimal(value), int(count))
- continue
- yield DataPoint(Decimal(clean_line), 1)
+ if agg_key_value:
+ key, value = clean_line.rstrip().rsplit(None, 1)
+ yield DataPoint(Decimal(key), int(value))
+ elif agg_value_key:
+ value, key = clean_line.lstrip().split(None, 1)
+ yield DataPoint(Decimal(key), int(value))
+ else:
+ yield DataPoint(Decimal(clean_line), 1)
except:
logging.exception('failed %r', line)
print >>sys.stderr, "invalid line %r" % line
@@ -219,7 +222,9 @@ def histogram(stream, options):
if __name__ == "__main__":
parser = OptionParser()
parser.usage = "cat data | %prog [options]"
- parser.add_option("-a", "--agg", dest="agg", default=False, action="store_true",
+ parser.add_option("-a", "--agg", dest="agg_value_key", default=False, action="store_true",
+ help="Two column input format, space seperated with value<space>key")
+ parser.add_option("-A", "--agg-key-value", dest="agg_key_value", default=False, action="store_true",
help="Two column input format, space seperated with key<space>value")
parser.add_option("-m", "--min", dest="min",
help="minimum value for graph")
@@ -242,5 +247,5 @@ if __name__ == "__main__":
parser.print_usage()
print "for more help use --help"
sys.exit(1)
- histogram(load_stream(sys.stdin, options.agg), options)
+ histogram(load_stream(sys.stdin, options.agg_value_key, options.agg_key_value), options)