diff options
author | Jehiah Czebotar <jehiah@gmail.com> | 2014-12-20 01:03:53 -0500 |
---|---|---|
committer | Jehiah Czebotar <jehiah@gmail.com> | 2014-12-20 01:03:53 -0500 |
commit | 45101596a07f0b59d219ec95aef1ded767dba64d (patch) | |
tree | 0684fc4cba50731162b12c082de33e005e74d5fc | |
parent | 5b020924cc796341fdf4b89e06f867e566bee4c1 (diff) | |
download | data_hacks-45101596a07f0b59d219ec95aef1ded767dba64d.zip data_hacks-45101596a07f0b59d219ec95aef1ded767dba64d.tar.gz data_hacks-45101596a07f0b59d219ec95aef1ded767dba64d.tar.bz2 |
separaet agg code
-rwxr-xr-x | data_hacks/bar_chart.py | 13 | ||||
-rwxr-xr-x | data_hacks/histogram.py | 21 |
2 files changed, 23 insertions, 11 deletions
diff --git a/data_hacks/bar_chart.py b/data_hacks/bar_chart.py index 3551860..13f76a8 100755 --- a/data_hacks/bar_chart.py +++ b/data_hacks/bar_chart.py @@ -41,11 +41,16 @@ def run(input_stream, options): data = defaultdict(int) total = 0 for row in input_stream: - if options.agg_values: - kv = row.replace('\t', ' ').split(' ',2); + if options.agg_key_value: + kv = row.rstrip().rsplit(None, 1) value = int(kv[1]) data[kv[0]] += value total += value + elif options.agg_value_key: + kv = row.lstrip().split(None, 1) + value = int(kv[0]) + data[kv[1]] += value + total += value else: data[row] += 1 total += 1 @@ -85,7 +90,9 @@ def run(input_stream, options): if __name__ == "__main__": parser = OptionParser() parser.usage = "cat data | %prog [options]" - parser.add_option("-a", "--agg-values", dest="agg_values", default=False, action="store_true", + parser.add_option("-a", "--agg", dest="agg_value_key", default=False, action="store_true", + help="Two column input format, space seperated with value<space>key") + parser.add_option("-A", "--agg-key-value", dest="agg_key_value", default=False, action="store_true", help="Two column input format, space seperated with key<space>value") parser.add_option("-k", "--sort-keys", dest="sort_keys", default=True, action="store_true", help="sort by the key [default]") diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py index 72b3806..042a02f 100755 --- a/data_hacks/histogram.py +++ b/data_hacks/histogram.py @@ -76,7 +76,7 @@ def test_mvsd(): assert '%.2f' % mvsd.var() == "8.25" assert '%.14f' % mvsd.sd() == "2.87228132326901" -def load_stream(input_stream, agg): +def load_stream(input_stream, agg_value_key, agg_key_value): for line in input_stream: clean_line = line.strip() if not clean_line: @@ -85,11 +85,14 @@ def load_stream(input_stream, agg): if clean_line[0] in ['"', "'"]: clean_line = clean_line.strip("\"'") try: - if agg: - value, count = line.replace("\t", ' ').split(' ', 2) - yield DataPoint(Decimal(value), int(count)) - continue - yield DataPoint(Decimal(clean_line), 1) + if agg_key_value: + key, value = clean_line.rstrip().rsplit(None, 1) + yield DataPoint(Decimal(key), int(value)) + elif agg_value_key: + value, key = clean_line.lstrip().split(None, 1) + yield DataPoint(Decimal(key), int(value)) + else: + yield DataPoint(Decimal(clean_line), 1) except: logging.exception('failed %r', line) print >>sys.stderr, "invalid line %r" % line @@ -219,7 +222,9 @@ def histogram(stream, options): if __name__ == "__main__": parser = OptionParser() parser.usage = "cat data | %prog [options]" - parser.add_option("-a", "--agg", dest="agg", default=False, action="store_true", + parser.add_option("-a", "--agg", dest="agg_value_key", default=False, action="store_true", + help="Two column input format, space seperated with value<space>key") + parser.add_option("-A", "--agg-key-value", dest="agg_key_value", default=False, action="store_true", help="Two column input format, space seperated with key<space>value") parser.add_option("-m", "--min", dest="min", help="minimum value for graph") @@ -242,5 +247,5 @@ if __name__ == "__main__": parser.print_usage() print "for more help use --help" sys.exit(1) - histogram(load_stream(sys.stdin, options.agg), options) + histogram(load_stream(sys.stdin, options.agg_value_key, options.agg_key_value), options) |