diff options
author | Ben Burry <bburry@etsy.com> | 2015-09-08 13:18:52 -0700 |
---|---|---|
committer | Ben Burry <bburry@etsy.com> | 2015-11-13 17:34:42 +0000 |
commit | 49341f2325e66601b0431ce29510b5086e334bee (patch) | |
tree | 0c1e83dd5ed6c6be103ed1896695673471261635 | |
parent | 1e32bd8711a3a07235937948f0d12da6c5a12aec (diff) | |
download | logster-49341f2325e66601b0431ce29510b5086e334bee.zip logster-49341f2325e66601b0431ce29510b5086e334bee.tar.gz logster-49341f2325e66601b0431ce29510b5086e334bee.tar.bz2 |
Documentation for output class and parser class
-rw-r--r-- | README.md | 57 | ||||
-rwxr-xr-x | bin/logster | 2 | ||||
-rw-r--r-- | docs/output_interface.md | 38 | ||||
-rw-r--r-- | docs/outputs.md | 45 | ||||
-rw-r--r-- | docs/parser_interface.md | 26 | ||||
-rw-r--r-- | docs/parsers.md | 53 | ||||
-rw-r--r-- | logster/logster_helper.py | 6 |
7 files changed, 184 insertions, 43 deletions
@@ -1,8 +1,7 @@ # Logster - generate metrics from logfiles [](http://travis-ci.org/etsy/logster) Logster is a utility for reading log files and generating metrics to -configurable outputs. Graphite, Ganglia, Amazon CloudWatch, Nagios, StatsD and -stdout are currently supported. It is ideal for visualizing trends of events that +configurable outputs. It is ideal for visualizing trends of events that are occurring in your application/system/error logs. For example, you might use logster to graph the number of occurrences of HTTP response code that appears in your web server logs. @@ -12,13 +11,19 @@ each successive execution only inspects new log entries. In other words, a 1 minute crontab entry for logster would allow you to generate near real-time trends in the configured output for anything you want to measure from your logs. -This tool is made up of a framework script, logster, and parsing scripts that +This tool is made up of a framework script, logster, and parsing classes that are written to accommodate your specific log format. Sample parsers are -included in this distribution. The parser scripts essentially read a log file +included in this distribution. The parser classes essentially read a log file line by line, apply a regular expression to extract useful data from the lines you are interested in, and then aggregate that data into metrics that will be -submitted to the configured output. Take a look through the sample parsers, which -should give you some idea of how to get started writing your own. +submitted to the configured output. The sample parsers should give you some idea +of how to get started writing your own. A list of available parsers can be +found on the [Parsers](./docs/parsers.md) page. + +Graphite, Ganglia, Amazon CloudWatch, Nagios, StatsD and stdout outputs are +provided, and Logster also supports the use of third-party output classes. +A list of available output classes can be found on the [Outputs](./docs/outputs.md) +page. ## History @@ -74,9 +79,7 @@ Once you have logtail or Pygtail installed, install Logster using the `setup.py` ## Usage -You can test logster from the command line. There are two sample parsers: -SampleLogster, which generates stats from an Apache access log; and -Log4jLogster, which generates stats from a log4j log. The --dry-run option will +You can test logster from the command line. The --dry-run option will allow you to see the metrics being generated on stdout rather than sending them to your configured output. @@ -120,45 +123,17 @@ Additional usage details can be found with the -h option: Options to pass to the logster parser such as "-o VALUE --option2 VALUE". These are parser-specific and passed directly to the parser. - --gmetric-options=GMETRIC_OPTIONS - Options to pass to gmetric such as "-d 180 -c - /etc/ganglia/gmond.conf" (default). These are passed - directly to gmetric. - --graphite-host=GRAPHITE_HOST - Hostname and port for Graphite collector, e.g. - graphite.example.com:2003 - --graphite-protocol=GRAPHITE_PROTOCOL - Specify graphite socket protocol. Options are tcp and - udp. Defaults to tcp. - --statsd-host=STATSD_HOST - Hostname and port for statsd collector, e.g. - statsd.example.com:8125 - --aws-key=AWS_KEY Amazon credential key - --aws-secret-key=AWS_SECRET_KEY - Amazon credential secret key - --nsca-host=NSCA_HOST - Hostname and port for NSCA daemon, e.g. - nsca.example.com:5667 - --nsca-service-hostname=NSCA_SERVICE_HOSTNAME - <host_name> value to use in nsca passive service - check. Default is "localhost" -s STATE_DIR, --state-dir=STATE_DIR Where to store the tailer state file. Default location /var/run -l LOG_DIR, --log-dir=LOG_DIR Where to store the logster logfile. Default location /var/log/logster - --log-conf=LOG_CONFIGURATION_FILE - Location of the logging configuration file. - Default is None and will use the default logging - configuration (rotating file) + --log-conf=LOG_CONF Logging configuration file. None by default -o OUTPUT, --output=OUTPUT Where to send metrics (can specify multiple times). - Choices are 'graphite', 'ganglia', 'cloudwatch', - 'nsca' , 'statsd', or 'stdout'. - --stdout-separator=STDOUT_SEPARATOR - Seperator between prefix/suffix and name for stdout. - Default is "_". + Choices are statsd, stdout, cloudwatch, graphite, + ganglia, nsca or a fully qualified Python class name -d, --dry-run Parse the log file but send stats to standard output. -D, --debug Provide more verbose logging for debugging. @@ -174,5 +149,3 @@ Additional usage details can be found with the -h option: If you have questions, you can find us on IRC in the `#codeascraft` channel on Freenode. - - diff --git a/bin/logster b/bin/logster index 85aa1e3..8fc90e1 100755 --- a/bin/logster +++ b/bin/logster @@ -288,7 +288,7 @@ def main(): # Check for lock file so we don't run multiple copies of the same parser # simultaneuosly. This will happen if the log parsing takes more time than - # the cron period, which is likely on first run if the logfile is huge. + # the cron period. try: lockfile = start_locking(lock_file) except LockingError as e: diff --git a/docs/output_interface.md b/docs/output_interface.md new file mode 100644 index 0000000..a199c35 --- /dev/null +++ b/docs/output_interface.md @@ -0,0 +1,38 @@ +Output Interface +----------------- + +Output classes are responsible for submitting metrics gathered in a +Logster run to a specific output. Each output class should implement +at least the following method: + +* `submit(self, metrics)`: the submit method is invoked at the end of + each run, and is responsible for sending the collected metrics values + to the output backend. + The `metrics` parameter is an iterable of [MetricObject][metric_object] + objects. + +There's a base class provided at [logster.logster_helper.LogsterOutput][logsteroutput]. +If your output class extends LogsterOutput, it needs to provide an implementation +for the `submit` method but will get access to the [get_metric_name][logster_output] +function for generating metric names using supplied prefix/suffix options. + + +Optionally, an output class can override the constructor: + +* `__init__(self, parser, options, logger)`: The output is instantiated + before invoking the parser on new log lines. + Parameters: `parser` is the optparse.OptionParser instance, `options` + are the parsed optparse options, `logger` is the python logging instance + +If your ouput needs to take custom options on the commandline, it can implement +the `add_options` classmethod to add its own options to an optparse.OptionParser +instance: + +* @classmethod + `add_options(cls, parser)`: Called during optparse option parsing to add custom + commandline options for this output. + Parameters: `parser` is teh optparse.OptionParser instance. + + +[metric_object]: ../logster/logster_helper.py +[logster_output]: ../logster/logster_helper.py diff --git a/docs/outputs.md b/docs/outputs.md new file mode 100644 index 0000000..9bff8e8 --- /dev/null +++ b/docs/outputs.md @@ -0,0 +1,45 @@ +Supported Outputs +------------------ + +Logster supports output classes that can send metrics to a backend +service or data store. Backend services, for instance, can retain metrics in a +time series data store, visualize metrics in graphs or tables, or generate alerts +based on defined thresholds. + +Logster includes the following built-in outputs: + +* [Graphite][graphite] (`graphite`): An open-source + time-series data store that provides visualization through a web-browser. +* [Ganglia][ganglia] (`ganglia`): A scalable distributed monitoring + system for high-performance computing systems. +* [Amazon CloudWatch][cloudwatch] (`cloudwatch`): A monitoring service for AWS + cloud resources and the applications you run on AWS. +* [Nagios][nagios] (`nsca`): An open-source monitoring system for systems, + networks and infrastructure +* [StatsD][statsd] (`statsd`): A simple daemon for easy stats aggregation +* Standard Output (`stdout`): Outputs the metrics to stdout. + +Outputs are just Python classes that implement the interface defined in [Output +Interface](./output_interface.md). Multiple outputs can be used at once: use the +`--output` (`-o`) commandline option repeatedly to specify the output classes +you'd like to use. + +There are a set of third-party outputs available. You'll need to make sure the +class is available on the [search path][search_path] in order to use it. + +## Available Third-party outputs +- + +If you have an output you'd like to be included here, please open a pull +request with a link to its source/GitHub repo + +Built-in outputs can be referenced using their short name (`graphite`, `ganglia` +etc), but to use third-party outputs you must specify a fully-qualified module +and class name available on the [search path][search_path]. + +[graphite]: http://graphite.wikidot.com +[ganglia]: http://ganglia.info/ +[cloudwatch]: https://aws.amazon.com/cloudwatch/ +[nagios]: https://www.nagios.org/ +[statsd]: https://github.com/etsy/statsd +[search_path]: https://docs.python.org/2/tutorial/modules.html#the-module-search-path diff --git a/docs/parser_interface.md b/docs/parser_interface.md new file mode 100644 index 0000000..ca52030 --- /dev/null +++ b/docs/parser_interface.md @@ -0,0 +1,26 @@ +Parser Interface +----------------- + +Parser classes are responsible for processing incoming logs line-by-line, and +providing an aggregated list of metrics for submission to the configured +outputs. + +A parser is required to implement at least the following methods: + +* `parse_line(self, line)`: called once for each line found in the log since + the last Logster run. Responsible for parsing the log line and extracting + values that are aggregated later. + Parameters: `line` is the log line string + Raises: `logster.logster_helper.LogsterParsingException` when the current + line cannot be parsed. Logster will continue with the next log line. + +* `get_state(self, duration)`: called once for each Logster run. Returns an + iterable of MetricObject instances with aggregated metrics values from the + parsed log lines. + Parameters: `duration` is the number of seconds since the last Logster run. + Metrics values returned are usually expected to be averaged across the + Logster run duration. + +A base class is provided at [logster.logster_helper.LogsterParser][logster_parser]. + +[logster_parser]: ../logster/logster_helper.py diff --git a/docs/parsers.md b/docs/parsers.md new file mode 100644 index 0000000..e9d50e5 --- /dev/null +++ b/docs/parsers.md @@ -0,0 +1,53 @@ +Supported Parsers +------------------ + +Logster uses parsing classes that are written to accommodate your specific log +format. Sample parsers are included in this distribution. + +Parser classes essentially read a log file line by line, apply a regular +expression to extract useful data from the lines you are interested in, and then +aggregate that data into metrics that will be submitted to the configured outputs. + +The sample parsers should give you some idea of how to get started writing your own. + +Logster includes the following built-in parsers: + +* [ErrorLogLogster][errorloglogster]: count the number of different messages in an Apache error_log +* [JsonLogster][jsonlogster]: parses a file of JsonObjects, each on their own line +* [Log4jLogster][log4jlogster]: count the number of events for each log level in a log4j log +* [MetricLogster][metriclogster]: Collects arbitrary metric lines and spits out aggregated + metric values +* [PostfixLogster][postfixlogster]: count the number of sent/deferred/bounced emails from a + Postfix log +* [SquidLogster][squidlogster]: count the number of responses and object size in the squid + access.log +* [SampleLogster][samplelogster]: count the number of response codes found in an Apache access + log + +You can use the provided parsers, or you can use your own parsers by passing +the complete module and parser name. In this case, the name of the parser does +not have to match the name of the module (you can have a logster.py file with a +MyCustomParser parser). Just make sure the module is in your [Python path][search_path] +- via a virtualenv, for example. + + $ /env/my_org/bin/logster --dry-run --output=stdout my_org_package.logster.MyCustomParser /var/log/my_custom_log + +Parsers are just Python classes that implement the interface defined in [Parser +Interface](./parser_interface.md). + +## Available Third-party parsers +- + +If you have a parser you'd like to be included here, please open a pull +request with a link to its source/GitHub repo + + +[search_path]: https://docs.python.org/2/tutorial/modules.html#the-module-search-path +[errorloglogster]: ../logster/parsers/ErrorLogLogster.py +[jsonlogster]: ../logster/parsers/JsonLogster.py +[log4jlogster]: ../logster/parsers/Log4jLogster.py +[metriclogster]: ../logster/parsers/MetricLogster.py +[postfixlogster]: ../logster/parsers/PostfixLogster.py +[squidlogster]: ../logster/parsers/SquidLogster.py +[samplelogster]: ../logster/parsers/SampleLogster.py + diff --git a/logster/logster_helper.py b/logster/logster_helper.py index 372af98..027fa96 100644 --- a/logster/logster_helper.py +++ b/logster/logster_helper.py @@ -55,12 +55,15 @@ class LockingError(Exception): class LogsterOutput(object): + """ Base class for logster outputs""" def __init__(self, parser, options, logger): self.options = options self.logger = logger self.dry_run = options.dry_run def get_metric_name(self, metric, separator="."): + """ Convenience method for contructing metric names + Takes into account any supplied prefix/suffix options""" metric_name = metric.name if self.options.metric_prefix: metric_name = self.options.metric_prefix + separator + metric_name @@ -68,3 +71,6 @@ class LogsterOutput(object): metric_name = metric_name + separator + self.options.metric_suffix return metric_name + def submit(self, metrics): + """Send metrics to the specific output""" + raise RuntimeError("Implement me!") |