diff options
author | Ben Burry <bburry@etsy.com> | 2015-01-11 16:45:27 +0000 |
---|---|---|
committer | Ben Burry <bburry@etsy.com> | 2015-01-11 16:45:27 +0000 |
commit | 086825cd1e3d6c65a36881918b45c1f811f9a9e3 (patch) | |
tree | 8211ccadaf76ad3aaf2c4501f732debcfc2d4aaa | |
parent | 8ff2a1ea673cc669cdf276652560f6c20aeb79b2 (diff) | |
download | logster-086825cd1e3d6c65a36881918b45c1f811f9a9e3.zip logster-086825cd1e3d6c65a36881918b45c1f811f9a9e3.tar.gz logster-086825cd1e3d6c65a36881918b45c1f811f9a9e3.tar.bz2 |
Provide alternative to logtail
Allows optional use of Pygtail as an alternative to logtail, for tailing the
log file.
Resolves #11
-rw-r--r-- | README.md | 75 | ||||
-rwxr-xr-x | bin/logster | 71 | ||||
-rw-r--r-- | logster/tailers/__init__.py | 20 | ||||
-rw-r--r-- | logster/tailers/logtailtailer.py | 24 | ||||
-rw-r--r-- | logster/tailers/pygtailtailer.py | 11 | ||||
-rwxr-xr-x | setup.py | 6 |
6 files changed, 135 insertions, 72 deletions
@@ -1,24 +1,24 @@ # Logster - generate metrics from logfiles [](http://travis-ci.org/etsy/logster) -Logster is a utility for reading log files and generating metrics in Graphite -or Ganglia or Amazon CloudWatch. It is ideal for visualizing trends of events that are occurring in -your application/system/error logs. For example, you might use logster to graph -the number of occurrences of HTTP response code that appears in your web server -logs. - -Logster maintains a cursor, via logtail, on each log file that it reads so that +Logster is a utility for reading log files and generating metrics to +configurable outputs. Graphite, Ganglia, Amazon CloudWatch, Nagios, StatsD and +stdout are currently supported. It is ideal for visualizing trends of events that +are occurring in your application/system/error logs. For example, you might use +logster to graph the number of occurrences of HTTP response code that appears in +your web server logs. + +Logster maintains a cursor, via a tailer, on each log file that it reads so that each successive execution only inspects new log entries. In other words, a 1 minute crontab entry for logster would allow you to generate near real-time -trends in Graphite or Ganglia or Amazon CloudWatch for anything you want to measure from your logs. +trends in the configured output for anything you want to measure from your logs. This tool is made up of a framework script, logster, and parsing scripts that -are written to accommodate your specific log format. Two sample parsers are +are written to accommodate your specific log format. Sample parsers are included in this distribution. The parser scripts essentially read a log file line by line, apply a regular expression to extract useful data from the lines you are interested in, and then aggregate that data into metrics that will be -submitted to Ganglia or Graphite or Amazon CloudWatch. Take a look through the sample -parsers, which should give you some idea of how to get started writing your -own. +submitted to the configured output. Take a look through the sample parsers, which +should give you some idea of how to get started writing your own. ## History @@ -34,19 +34,29 @@ our engineers to write log parsers quickly. ## Installation -Logster depends on the "logtail" utility that can be obtained from the logcheck -package, either from a Debian package manager or from source: +Logster supports two methods for gathering data from a logfile: + +1. By default, Logster uses the "logtail" utility that can be obtained from the + logcheck package, either from a Debian package manager or from source: + + http://packages.debian.org/source/sid/logcheck + + RPMs for logcheck can be found here: + + http://rpmfind.net/linux/rpm2html/search.php?query=logcheck + +2. Optionally, Logster can use the "Pygtail" Python module instead of logtail. + You can install Pygtail using pip - http://packages.debian.org/source/sid/logcheck + ``` + $ pip install pygtail + ``` -RPMs for logcheck can be found here: + To use Pygtail, supply the ```--tailer=pygtail``` option on the Logster + commandline. - http://rpmfind.net/linux/rpm2html/search.php?query=logcheck -Once you have logtail installed via the logcheck package, you make want to look -over the actual logster script itself to adjust any paths necessary. Then the -only other thing you need to do is run the installation commands from the -`setup.py` file: +Once you have logtail or Pygtail installed, install Logster using the `setup.py` file: $ sudo python setup.py install @@ -57,7 +67,7 @@ You can test logster from the command line. There are two sample parsers: SampleLogster, which generates stats from an Apache access log; and Log4jLogster, which generates stats from a log4j log. The --dry-run option will allow you to see the metrics being generated on stdout rather than sending them -to Ganglia or Graphite or Amazon CloudWatch. +to your configured output. $ sudo /usr/bin/logster --dry-run --output=ganglia SampleLogster /var/log/httpd/access_log @@ -73,7 +83,7 @@ a virtualenv, for example. Additional usage details can be found with the -h option: - $ ./logster -h + $ logster -h Usage: logster [options] parser logfile Tail a log file and filter each line to generate metrics that can be sent to @@ -81,8 +91,11 @@ Additional usage details can be found with the -h option: Options: -h, --help show this help message and exit - --logtail=LOGTAIL Specify location of logtail. Default - /usr/sbin/logtail2 + -t TAILER, --tailer=TAILER + Specify which tailer to use. Options are logtail and + pygtail. Default is "logtail". + --logtail=LOGTAIL Specify location of logtail. Default + "/usr/sbin/logtail2" -p METRIC_PREFIX, --metric-prefix=METRIC_PREFIX Add prefix to all published metrics. This is for people that may multiple instances of same service on @@ -104,8 +117,8 @@ Additional usage details can be found with the -h option: Hostname and port for Graphite collector, e.g. graphite.example.com:2003 --graphite-protocol=GRAPHITE_PROTOCOL - Specify graphite socket protocol. Options are tcp and udp. - Defaults to tcp. + Specify graphite socket protocol. Options are tcp and + udp. Defaults to tcp. --statsd-host=STATSD_HOST Hostname and port for statsd collector, e.g. statsd.example.com:8125 @@ -117,13 +130,13 @@ Additional usage details can be found with the -h option: nsca.example.com:5667 --nsca-service-hostname=NSCA_SERVICE_HOSTNAME <host_name> value to use in nsca passive service - check. Default is "sandbox.bbc.co.uk" + check. Default is "localhost" -s STATE_DIR, --state-dir=STATE_DIR - Where to store the logtail state file. Default + Where to store the tailer state file. Default location /var/run -l LOG_DIR, --log-dir=LOG_DIR - Where to store the logster logfile. Default - location /var/log/logster + Where to store the logster logfile. Default location + /var/log/logster -o OUTPUT, --output=OUTPUT Where to send metrics (can specify multiple times). Choices are 'graphite', 'ganglia', 'cloudwatch', diff --git a/bin/logster b/bin/logster index b035170..43ec09b 100755 --- a/bin/logster +++ b/bin/logster @@ -57,10 +57,10 @@ from math import floor # Local dependencies from logster.logster_helper import LogsterParsingException, LockingError, CloudWatch, CloudWatchException +from logster.tailers.logtailtailer import LogtailTailer # Globals gmetric = "/usr/bin/gmetric" -logtail = "/usr/sbin/logtail2" log_dir = "/var/log/logster" state_dir = "/var/run" send_nsca = "/usr/sbin/send_nsca" @@ -70,8 +70,10 @@ script_start_time = time() # Command-line options and parsing. cmdline = optparse.OptionParser(usage="usage: %prog [options] parser logfile", description="Tail a log file and filter each line to generate metrics that can be sent to common monitoring packages.") -cmdline.add_option('--logtail', action='store', default=logtail, - help='Specify location of logtail. Default %s' % logtail) +cmdline.add_option('--tailer', '-t', action='store', default='logtail', + choices=('logtail', 'pygtail'), help='Specify which tailer to use. Options are logtail and pygtail. Default is \"%default\".') +cmdline.add_option('--logtail', action='store', default=LogtailTailer.default_logtail_path, + help='Specify location of logtail. Default \"%default\"') cmdline.add_option('--metric-prefix', '-p', action='store', help='Add prefix to all published metrics. This is for people that may multiple instances of same service on same host.', default='') @@ -102,7 +104,7 @@ cmdline.add_option('--nsca-service-hostname', action='store', help='<host_name> value to use in nsca passive service check. Default is \"%default\"', default=socket.gethostname()) cmdline.add_option('--state-dir', '-s', action='store', default=state_dir, - help='Where to store the logtail state file. Default location %s' % state_dir) + help='Where to store the tailer state file. Default location %s' % state_dir) cmdline.add_option('--log-dir', '-l', action='store', default=log_dir, help='Where to store the logster logfile. Default location %s' % log_dir) cmdline.add_option('--output', '-o', action='append', @@ -122,6 +124,13 @@ if options.parser_help: if (len(arguments) != 2): cmdline.print_help() cmdline.error("Supply at least two arguments: parser and logfile.") + +if options.tailer == 'pygtail': + from logster.tailers.pygtailtailer import PygtailTailer + tailer_klass = PygtailTailer +else: + tailer_klass = LogtailTailer + if not options.output: cmdline.print_help() cmdline.error("Supply where the data should be sent with -o (or --output).") @@ -142,7 +151,6 @@ if class_name.find('.') == -1: log_file = arguments[1] state_dir = options.state_dir log_dir = options.log_dir -logtail = options.logtail # Logging infrastructure for use throughout the script. @@ -359,14 +367,13 @@ def end_locking(lockfile_fd, lockfile_name): def main(): - dirsafe_logfile = log_file.replace('/','-') - logtail_state_file = '%s/logtail-%s%s.state' % (state_dir, class_name, dirsafe_logfile) - logtail_lock_file = '%s/logtail-%s%s.lock' % (state_dir, class_name, dirsafe_logfile) - shell_tail = "%s -f %s -o %s" % (logtail, log_file, logtail_state_file) + state_file = '%s/%s-%s%s.state' % (state_dir, tailer_klass.short_name, class_name, dirsafe_logfile) + lock_file = '%s/%s-%s%s.lock' % (state_dir, tailer_klass.short_name, class_name, dirsafe_logfile) + tailer = tailer_klass(log_file, state_file, options, logger) logger.info("Executing parser %s on logfile %s" % (class_name, log_file)) - logger.debug("Using state file %s" % logtail_state_file) + logger.debug("Using state file %s" % state_file) # Import and instantiate the class from the module passed in. module_name, parser_name = class_name.rsplit('.', 1) @@ -377,7 +384,7 @@ def main(): # simultaneuosly. This will happen if the log parsing takes more time than # the cron period, which is likely on first run if the logfile is huge. try: - lockfile = start_locking(logtail_lock_file) + lockfile = start_locking(lock_file) except LockingError as e: logger.warning("Failed to get lock. Is another instance of logster running?") sys.exit(1) @@ -386,11 +393,11 @@ def main(): try: # Read the age of the state file to see how long it's been since we last - # ran. Replace the state file if it has gone missing. While we are her, - # touch the state file to reset the time in case logtail doesn't + # ran. Replace the state file if it has gone missing. While we are here, + # touch the state file to reset the time in case the tailer doesn't # find any new lines (and thus won't update the statefile). try: - state_file_age = os.stat(logtail_state_file)[stat.ST_MTIME] + state_file_age = os.stat(state_file)[stat.ST_MTIME] # Calculate now() - state file age to determine check duration. duration = floor(time()) - floor(state_file_age) @@ -398,30 +405,12 @@ def main(): except OSError as e: logger.info('Writing new state file and exiting. (Was either first run, or state file went missing.)') - input = os.popen(shell_tail) - retval = input.close() - if not retval is None: - logger.warning('%s returned bad exit code %s' % (shell_tail, retval)) - end_locking(lockfile, logtail_lock_file) + tailer.create_statefile() + end_locking(lockfile, lock_file) sys.exit(0) - # Open a pipe to read input from logtail. - input = os.popen(shell_tail) - - except SystemExit as e: - raise - - except Exception as e: - # note - there is no exception when logtail doesn't exist. - # I don't know when this exception will ever actually be triggered. - print("Failed to run %s to get log data (line %s): %s" % - (shell_tail, lineno(), e)) - end_locking(lockfile, logtail_lock_file) - sys.exit(1) - - # Parse each line from input, then send all stats to their collectors. - try: - for line in input: + # Parse each line from input, then send all stats to their collectors. + for line in tailer.ireadlines(): try: parser.parse_line(line) except LogsterParsingException as e: @@ -431,10 +420,12 @@ def main(): submit_stats(parser, duration, options) + except SystemExit as e: + raise except Exception as e: print("Exception caught at %s: %s" % (lineno(), e)) traceback.print_exc() - end_locking(lockfile, logtail_lock_file) + end_locking(lockfile, lock_file) sys.exit(1) # Log the execution time @@ -444,13 +435,13 @@ def main(): # Set mtime and atime for the state file to the startup time of the script # so that the cron interval is not thrown off by parsing a large number of # log entries. - os.utime(logtail_state_file, (floor(script_start_time), floor(script_start_time))) + os.utime(state_file, (floor(script_start_time), floor(script_start_time))) - end_locking(lockfile, logtail_lock_file) + end_locking(lockfile, lock_file) # try and remove the lockfile one last time, but it's a valid state that it's already been removed. try: - end_locking(lockfile, logtail_lock_file) + end_locking(lockfile, lock_file) except Exception as e: pass diff --git a/logster/tailers/__init__.py b/logster/tailers/__init__.py new file mode 100644 index 0000000..a3306de --- /dev/null +++ b/logster/tailers/__init__.py @@ -0,0 +1,20 @@ +class Tailer(object): + """ Base class for tailer implementations """ + def __init__(self, logfile, statefile, options, logger): + self.logfile = logfile + self.statefile = statefile + self.options = options + self.logger = logger + + def create_statefile(self): + """ Create a statefile, with the offset of the end of the log file. + Override if your tailer implementation can do this more efficiently + """ + for _ in self.ireadlines(): + pass + + def ireadlines(self): + """ Return a generator over lines in the logfile, updating the + statefile when the generator is exhausted + """ + raise NotImplementedError() diff --git a/logster/tailers/logtailtailer.py b/logster/tailers/logtailtailer.py new file mode 100644 index 0000000..7afeded --- /dev/null +++ b/logster/tailers/logtailtailer.py @@ -0,0 +1,24 @@ +from logster.tailers import Tailer +import os + + +class LogtailTailer(Tailer): + short_name = 'logtail' + default_logtail_path = '/usr/sbin/logtail2' + + def __init__(self, *args): + super(LogtailTailer, self).__init__(*args) + self.shell_tail = "%s -f %s -o %s" % (self.options.logtail, self.logfile, self.statefile) + + def create_statefile(self): + input = os.popen(self.shell_tail) + retval = input.close() + if not retval is None: + self.logger.warning('%s returned bad exit code %s' % (self.shell_tail, retval)) + + def ireadlines(self): + input = os.popen(self.shell_tail) + for line in input: + yield line + input.close() + diff --git a/logster/tailers/pygtailtailer.py b/logster/tailers/pygtailtailer.py new file mode 100644 index 0000000..f3e7aae --- /dev/null +++ b/logster/tailers/pygtailtailer.py @@ -0,0 +1,11 @@ +from logster.tailers import Tailer +import pygtail + + +class PygtailTailer(Tailer): + short_name = 'pygtail' + + def ireadlines(self): + tailer = pygtail.Pygtail(self.logfile, offset_file=self.statefile) + for line in tailer: + yield line @@ -14,7 +14,11 @@ setup( url='https://github.com/etsy/logster', packages=[ 'logster', - 'logster/parsers' + 'logster/parsers', + 'logster/tailers' + ], + install_requires = [ + 'pygtail>=0.5.1' ], zip_safe=False, scripts=[ |