1 files changed, 730 insertions, 0 deletions
diff --git a/gitautodeploy/gitautodeploy.py b/gitautodeploy/gitautodeploy.py
new file mode 100644
index 0000000..07677fe
--- /dev/null
+++ b/gitautodeploy/gitautodeploy.py
@@ -0,0 +1,730 @@
+from BaseHTTPServer import BaseHTTPRequestHandler
+
+class LogInterface(object):
+    """Interface that functions as a stdout and stderr handler and directs the
+    output to the logging module, which in turn will output to either console,
+    file or both."""
+
+    def __init__(self, level=None):
+        import logging
+        self.level = (level if level else logging.getLogger().info)
+
+    def write(self, msg):
+        for line in msg.strip().split("\n"):
+            self.level(line)
+
+class WebhookRequestHandler(BaseHTTPRequestHandler):
+    """Extends the BaseHTTPRequestHandler class and handles the incoming
+    HTTP requests."""
+
+    def do_POST(self):
+        """Invoked on incoming POST requests"""
+        from threading import Timer
+        import logging
+
+        logger = logging.getLogger()
+
+        content_type = self.headers.getheader('content-type')
+        content_length = int(self.headers.getheader('content-length'))
+        request_body = self.rfile.read(content_length)
+        
+        # Extract request headers and make all keys to lowercase (makes them easier to compare)
+        request_headers = dict(self.headers)
+        request_headers = dict((k.lower(), v) for k, v in request_headers.iteritems())
+
+        ServiceRequestParser = self.figure_out_service_from_request(request_headers, request_body)
+        
+        self.send_response(200)
+        self.send_header('Content-type', 'text/plain')
+        self.end_headers()
+
+        # Unable to identify the source of the request
+        if not ServiceRequestParser:
+            logger.error('Unable to find appropriate handler for request. The source service is not supported.')
+            return
+
+        # Could be GitHubParser, GitLabParser or other
+        repo_configs, ref, action = ServiceRequestParser(self._config).get_repo_params_from_request(request_headers, request_body)
+
+        #if success:
+        #    print "Successfullt handled request using %s" % ServiceHandler.__name__
+        #else:
+        #    print "Unable to handle request using %s" % ServiceHandler.__name__
+
+        if len(repo_configs) == 0:
+            logger.warning('Unable to find any of the repository URLs in the config: %s' % ', '.join(repo_urls))
+            return
+
+        # Wait one second before we do git pull (why?)
+        Timer(1.0, self.process_repositories, (repo_configs,
+                                               ref,
+                                               action)).start()
+
+    def log_message(self, format, *args):
+        """Overloads the default message logging method to allow messages to
+        go through our custom logger instead."""
+        import logging
+        logger = logging.getLogger()
+        logger.info("%s - - [%s] %s\n" % (self.client_address[0],
+                                          self.log_date_time_string(),
+                                          format%args))
+
+    def figure_out_service_from_request(self, request_headers, request_body):
+        """Parses the incoming request and attempts to determine whether
+        it originates from GitHub, GitLab or any other known service."""
+        import json
+        import logging
+        import parsers
+
+        logger = logging.getLogger()
+        data = json.loads(request_body)
+
+        user_agent = 'user-agent' in request_headers and request_headers['user-agent']
+        content_type = 'content-type' in request_headers and request_headers['content-type']
+
+        # Assume GitLab if the X-Gitlab-Event HTTP header is set
+        if 'x-gitlab-event' in request_headers:
+
+            logger.info("Received event from GitLab")
+            return parsers.GitLabRequestParser
+
+        # Assume GitHub if the X-GitHub-Event HTTP header is set
+        elif 'x-github-event' in request_headers:
+
+            logger.info("Received event from GitHub")
+            return parsers.GitHubRequestParser
+
+        # Assume BitBucket if the User-Agent HTTP header is set to
+        # 'Bitbucket-Webhooks/2.0' (or something similar)
+        elif user_agent and user_agent.lower().find('bitbucket') != -1:
+
+            logger.info("Received event from BitBucket")
+            return parsers.BitBucketRequestParser
+
+        # Special Case for Gitlab CI
+        elif content_type == "application/json" and "build_status" in data:
+
+            logger.info('Received event from Gitlab CI')
+            return parsers.GitLabCIRequestParser
+
+        # This handles old GitLab requests and Gogs requests for example.
+        elif content_type == "application/json":
+
+            logger.info("Received event from unknown origin.")
+            return parsers.GenericRequestParser
+
+        logger.error("Unable to recognize request origin. Don't know how to handle the request.")
+        return
+
+
+    def process_repositories(self, repo_configs, ref, action):
+        import os
+        import time
+        import logging
+        from wrappers import GitWrapper
+        from lock import Lock
+        from exceptions import FilterMatchError
+        
+        logger = logging.getLogger()
+
+        # Process each matching repository
+        for repo_config in repo_configs:
+
+            try:
+                # Verify that all filters matches the request if specified
+                if 'filters' in repo_config:
+                    for filter in repo_config['filters']:
+                        if filter['type'] == 'pull-request-filter':
+                            if filter['ref'] == ref and filter['action'] == action:
+                                continue
+                            raise FilterMatchError()
+                        else:
+                            logger.error('Unrecognized filter: ' % filter)
+                            raise FilterMatchError()
+                            
+            except FilterMatchError as e:
+                continue
+            
+            
+            # In case there is no path configured for the repository, no pull will
+            # be made.
+            if not 'path' in repo_config:
+                GitWrapper.deploy(repo_config)
+                continue
+            
+            running_lock = Lock(os.path.join(repo_config['path'], 'status_running'))
+            waiting_lock = Lock(os.path.join(repo_config['path'], 'status_waiting'))
+            try:
+
+                # Attempt to obtain the status_running lock
+                while not running_lock.obtain():
+
+                    # If we're unable, try once to obtain the status_waiting lock
+                    if not waiting_lock.has_lock() and not waiting_lock.obtain():
+                        logger.error("Unable to obtain the status_running lock nor the status_waiting lock. Another process is " +
+                                        "already waiting, so we'll ignore the request.")
+
+                        # If we're unable to obtain the waiting lock, ignore the request
+                        break
+
+                    # Keep on attempting to obtain the status_running lock until we succeed
+                    time.sleep(5)
+
+                n = 4
+                while 0 < n and 0 != GitWrapper.pull(repo_config):
+                    n -= 1
+
+                if 0 < n:
+                    GitWrapper.deploy(repo_config)
+
+            except Exception as e:
+                logger.error('Error during \'pull\' or \'deploy\' operation on path: %s' % repo_config['path'])
+                logger.error(e)
+
+            finally:
+
+                # Release the lock if it's ours
+                if running_lock.has_lock():
+                    running_lock.release()
+
+                # Release the lock if it's ours
+                if waiting_lock.has_lock():
+                    waiting_lock.release()
+
+
+class GitAutoDeploy(object):
+    _instance = None
+    _server = None
+    _config = None
+
+    def __new__(cls, *args, **kwargs):
+        """Overload constructor to enable Singleton access"""
+        if not cls._instance:
+            cls._instance = super(GitAutoDeploy, cls).__new__(
+                cls, *args, **kwargs)
+        return cls._instance
+
+    @staticmethod
+    def debug_diagnosis(port):
+        import logging
+        logger = logging.getLogger()
+
+        pid = GitAutoDeploy.get_pid_on_port(port)
+        if pid is False:
+            logger.warning('I don\'t know the number of pid that is using my configured port')
+            return
+
+        logger.info('Process with pid number %s is using port %s' % (pid, port))
+        with open("/proc/%s/cmdline" % pid) as f:
+            cmdline = f.readlines()
+            logger.info('cmdline ->', cmdline[0].replace('\x00', ' '))
+
+    @staticmethod
+    def get_pid_on_port(port):
+        import os
+
+        with open("/proc/net/tcp", 'r') as f:
+            file_content = f.readlines()[1:]
+
+        pids = [int(x) for x in os.listdir('/proc') if x.isdigit()]
+        conf_port = str(port)
+        mpid = False
+
+        for line in file_content:
+            if mpid is not False:
+                break
+
+            _, laddr, _, _, _, _, _, _, _, inode = line.split()[:10]
+            decport = str(int(laddr.split(':')[1], 16))
+
+            if decport != conf_port:
+                continue
+
+            for pid in pids:
+                try:
+                    path = "/proc/%s/fd" % pid
+                    if os.access(path, os.R_OK) is False:
+                        continue
+
+                    for fd in os.listdir(path):
+                        cinode = os.readlink("/proc/%s/fd/%s" % (pid, fd))
+                        minode = cinode.split(":")
+
+                        if len(minode) == 2 and minode[1][1:-1] == inode:
+                            mpid = pid
+
+                except Exception as e:
+                    pass
+
+        return mpid
+
+    def find_config_file_path(self):
+        """Attempt to find a config file in cwd and script path."""
+
+        import os
+        import re
+        import logging
+        logger = logging.getLogger()
+
+        # Look for a custom config file if no path is provided as argument
+        target_directories = [
+            os.path.dirname(os.path.realpath(__file__)),  # Script path
+        ]
+
+        # Add current CWD if not identical to script path
+        if not os.getcwd() in target_directories:
+            target_directories.append(os.getcwd())
+
+        target_directories.reverse()
+
+        # Look for a *conf.json or *config.json
+        for dir in target_directories:
+            for item in os.listdir(dir):
+                if re.match(r"conf(ig)?\.json$", item):
+                    path = os.path.realpath(os.path.join(dir, item))
+                    logger.info("Using '%s' as config" % path)
+                    return path
+
+        return './GitAutoDeploy.conf.json'
+
+    def read_json_file(self, file_path):
+        import json
+        import logging
+        logger = logging.getLogger()
+        
+        try:
+            json_string = open(file_path).read()
+
+        except Exception as e:
+            logger.critical("Could not load %s file\n" % file_path)
+            raise e
+
+        try:
+            data = json.loads(json_string)
+
+        except Exception as e:
+            logger.critical("%s file is not valid JSON\n" % file_path)
+            raise e
+
+        return data
+
+    def read_repo_config_from_environment(self, config_data):
+        """Look for repository config in any defined environment variables. If
+        found, import to main config."""
+        import logging
+        import os
+
+        if 'GAD_REPO_URL' not in os.environ:
+            return config_data
+
+        logger = logging.getLogger()
+
+        repo_config = {
+            'url': os.environ['GAD_REPO_URL']
+        }
+        
+        logger.info("Added configuration for '%s' found environment variables" % os.environ['GAD_REPO_URL'])
+
+        if 'GAD_REPO_BRANCH' in os.environ:
+            repo_config['branch'] = os.environ['GAD_REPO_BRANCH']
+
+        if 'GAD_REPO_REMOTE' in os.environ:
+            repo_config['remote'] = os.environ['GAD_REPO_REMOTE']
+
+        if 'GAD_REPO_PATH' in os.environ:
+            repo_config['path'] = os.environ['GAD_REPO_PATH']
+
+        if 'GAD_REPO_DEPLOY' in os.environ:
+            repo_config['deploy'] = os.environ['GAD_REPO_DEPLOY']
+
+        if not 'repositories' in config_data:
+            config_data['repositories'] = []
+
+        config_data['repositories'].append(repo_config)
+
+        return config_data
+
+    def init_config(self, config_data):
+        import os
+        import re
+        import logging
+        logger = logging.getLogger()
+        
+        self._config = config_data
+
+        # Translate any ~ in the path into /home/<user>
+        if 'pidfilepath' in self._config:
+            self._config['pidfilepath'] = os.path.expanduser(self._config['pidfilepath'])
+
+        for repo_config in self._config['repositories']:
+
+            # Setup branch if missing
+            if 'branch' not in repo_config:
+                repo_config['branch'] = "master"
+
+            # Setup remote if missing
+            if 'remote' not in repo_config:
+                repo_config['remote'] = "origin"
+
+            # Setup deploy commands list if not present
+            if 'deploy_commands' not in repo_config:
+                repo_config['deploy_commands'] = []
+
+            # Check if any global pre deploy commands is specified
+            if len(self._config['global_deploy'][0]) is not 0:
+                repo_config['deploy_commands'].insert(0, self._config['global_deploy'][0])
+
+            # Check if any repo specific deploy command is specified
+            if 'deploy' in repo_config:
+                repo_config['deploy_commands'].append(repo_config['deploy'])
+
+            # Check if any global post deploy command is specified
+            if len(self._config['global_deploy'][1]) is not 0:
+                repo_config['deploy_commands'].append(self._config['global_deploy'][1])
+
+            # If a Bitbucket repository is configured using the https:// URL, a username is usually
+            # specified in the beginning of the URL. To be able to compare configured Bitbucket
+            # repositories with incoming web hook events, this username needs to be stripped away in a
+            # copy of the URL.
+            if 'url' in repo_config and 'bitbucket_username' not in repo_config:
+                regexp = re.search(r"^(https?://)([^@]+)@(bitbucket\.org/)(.+)$", repo_config['url'])
+                if regexp:
+                    repo_config['url_without_usernme'] = regexp.group(1) + regexp.group(3) + regexp.group(4)
+
+            # Translate any ~ in the path into /home/<user>
+            if 'path' in repo_config:
+                repo_config['path'] = os.path.expanduser(repo_config['path'])
+
+        return self._config
+
+    def clone_all_repos(self):
+        """Iterates over all configured repositories and clones them to their
+        configured paths."""
+        import os
+        import re
+        import logging
+        from wrappers import GitWrapper
+        logger = logging.getLogger()
+
+        # Iterate over all configured repositories
+        for repo_config in self._config['repositories']:
+            
+            # Only clone repositories with a configured path
+            if 'path' not in repo_config:
+                logger.info("Repository %s will not be cloned (no path configured)" % repo_config['url'])
+                continue
+            
+            if os.path.isdir(repo_config['path']) and os.path.isdir(repo_config['path']+'/.git'):
+                logger.info("Repository %s already present" % repo_config['url'])
+                continue
+
+            # Clone repository
+            GitWrapper.clone(url=repo_config['url'], branch=repo_config['branch'], path=repo_config['path'])
+            
+            if os.path.isdir(repo_config['path']):
+                logger.info("Repository %s successfully cloned" % repo_config['url'])
+            else:
+                logger.error("Unable to clone %s branch of repository %s" % (repo_config['branch'], repo_config['url']))
+
+    def ssh_key_scan(self):
+        import re
+        import logging
+        from wrappers import ProcessWrapper
+        logger = logging.getLogger()
+
+        for repository in self._config['repositories']:
+
+            url = repository['url']
+            logger.info("Scanning repository: %s" % url)
+            m = re.match('.*@(.*?):', url)
+
+            if m is not None:
+                port = repository['port']
+                port = '' if port is None else ('-p' + port)
+                ProcessWrapper().call(['ssh-keyscan -t ecdsa,rsa ' +
+                                       port + ' ' +
+                                       m.group(1) +
+                                       ' >> ' +
+                                       '$HOME/.ssh/known_hosts'], shell=True)
+
+            else:
+                logger.error('Could not find regexp match in path: %s' % url)
+
+    def kill_conflicting_processes(self):
+        import os
+        import logging
+        logger = logging.getLogger()
+
+        pid = GitAutoDeploy.get_pid_on_port(self._config['port'])
+
+        if pid is False:
+            logger.error('[KILLER MODE] I don\'t know the number of pid ' +
+                         'that is using my configured port\n[KILLER MODE] ' +
+                         'Maybe no one? Please, use --force option carefully')
+            return False
+
+        os.kill(pid, signal.SIGKILL)
+        return True
+
+    def create_pid_file(self):
+        import os
+
+        with open(self._config['pidfilepath'], 'w') as f:
+            f.write(str(os.getpid()))
+
+    def read_pid_file(self):
+        with open(self._config['pidfilepath'], 'r') as f:
+            return f.readlines()
+
+    def remove_pid_file(self):
+        import os
+        os.remove(self._config['pidfilepath'])
+
+    def exit(self):
+        import sys
+        import logging
+        logger = logging.getLogger()
+        logger.info('\nGoodbye')
+        self.remove_pid_file()
+        sys.exit(0)
+
+    @staticmethod
+    def create_daemon():
+        import os
+
+        try:
+            # Spawn first child. Returns 0 in the child and pid in the parent.
+            pid = os.fork()
+        except OSError, e:
+            raise Exception("%s [%d]" % (e.strerror, e.errno))
+
+        # First child
+        if pid == 0:
+            os.setsid()
+
+            try:
+                # Spawn second child
+                pid = os.fork()
+                
+            except OSError, e:
+                raise Exception("%s [%d]" % (e.strerror, e.errno))
+
+            if pid == 0:
+                os.chdir('/')
+                os.umask(0)
+            else:
+                # Kill first child
+                os._exit(0)
+        else:
+            # Kill parent of first child
+            os._exit(0)
+
+        return 0
+
+    def run(self):
+        import sys
+        from BaseHTTPServer import HTTPServer
+        import socket
+        import os
+        import logging
+        import argparse
+        from lock import Lock
+
+        # Attempt to retrieve default config values from environment variables
+        default_quiet_value = 'GAD_QUIET' in os.environ
+        default_daemon_mode_value = 'GAD_DAEMON_MODE' in os.environ
+        default_config_value = 'GAD_CONFIG' in os.environ and os.environ['GAD_CONFIG']
+        default_ssh_keygen_value = 'GAD_SSH_KEYGEN' in os.environ
+        default_force_value = 'GAD_FORCE' in os.environ
+        default_pid_file_value = 'GAD_PID_FILE' in os.environ and os.environ['GAD_PID_FILE']
+        default_log_file_value = 'GAD_LOG_FILE' in os.environ and os.environ['GAD_LOG_FILE']
+        default_host_value = 'GAD_HOST' in os.environ and os.environ['GAD_HOST']
+        default_port_value = 'GAD_PORT' in os.environ and int(os.environ['GAD_PORT'])
+
+        parser = argparse.ArgumentParser()
+
+        parser.add_argument("-d", "--daemon-mode",
+                            help="run in background (daemon mode)",
+                            default=default_daemon_mode_value,
+                            action="store_true")
+
+        parser.add_argument("-q", "--quiet",
+                            help="supress console output",
+                            default=default_quiet_value,
+                            action="store_true")
+
+        parser.add_argument("-c", "--config",
+                            help="custom configuration file",
+                            default=default_config_value,
+                            type=str)
+
+        parser.add_argument("--ssh-keygen",
+                            help="scan repository hosts for ssh keys",
+                            default=default_ssh_keygen_value,
+                            action="store_true")
+
+        parser.add_argument("--force",
+                            help="kill any process using the configured port",
+                            default=default_force_value,
+                            action="store_true")
+
+        parser.add_argument("--pid-file",
+                            help="specify a custom pid file",
+                            default=default_pid_file_value,
+                            type=str)
+
+        parser.add_argument("--log-file",
+                            help="specify a log file",
+                            default=default_log_file_value,
+                            type=str)
+
+        parser.add_argument("--host",
+                            help="address to bind to",
+                            default=default_host_value,
+                            type=str)
+
+        parser.add_argument("--port",
+                            help="port to bind to",
+                            default=default_port_value,
+                            type=int)
+
+        args = parser.parse_args()
+
+        # Set up logging
+        logger = logging.getLogger()
+        logFormatter = logging.Formatter("%(asctime)s [%(levelname)-5.5s]  %(message)s")
+
+        # Enable console output?
+        if args.quiet:
+            logger.addHandler(logging.NullHandler())
+        else:
+            consoleHandler = logging.StreamHandler()
+            consoleHandler.setFormatter(logFormatter)
+            logger.addHandler(consoleHandler)
+
+        # All logs are recording
+        logger.setLevel(logging.NOTSET)
+
+        # Look for log file path provided in argument
+        config_file_path = None
+        if args.config:
+            config_file_path = os.path.realpath(args.config)
+            logger.info('Using custom configuration file \'%s\'' % config_file_path)
+
+        # Try to find a config file on the file system
+        if not config_file_path:
+            config_file_path = self.find_config_file_path()
+
+        # Read config data from json file
+        config_data = self.read_json_file(config_file_path)
+
+        # Configuration options coming from environment or command line will
+        # override those coming from config file
+        if args.pid_file:
+            config_data['pidfilepath'] = args.pid_file
+
+        if args.log_file:
+            config_data['logfilepath'] = args.log_file
+
+        if args.host:
+            config_data['host'] = args.host
+
+        if args.port:
+            config_data['port'] = args.port
+
+        # Extend config data with any repository defined by environment variables
+        config_data = self.read_repo_config_from_environment(config_data)
+
+        # Initialize config using config file data
+        self.init_config(config_data)
+
+        # Translate any ~ in the path into /home/<user>
+        if 'logfilepath' in self._config:
+            log_file_path = os.path.expanduser(self._config['logfilepath'])
+            fileHandler = logging.FileHandler(log_file_path)
+            fileHandler.setFormatter(logFormatter)
+            logger.addHandler(fileHandler)
+
+        if args.ssh_keygen:
+            logger.info('Scanning repository hosts for ssh keys...')
+            self.ssh_key_scan()
+
+        if args.force:
+            logger.info('Attempting to kill any other process currently occupying port %s' % self._config['port'])
+            self.kill_conflicting_processes()
+
+        # Clone all repos once initially
+        self.clone_all_repos()
+
+        # Set default stdout and stderr to our logging interface (that writes
+        # to file and console depending on user preference)
+        sys.stdout = LogInterface(logger.info)
+        sys.stderr = LogInterface(logger.error)
+        
+        if args.daemon_mode:
+            logger.info('Starting Git Auto Deploy in daemon mode')
+            GitAutoDeploy.create_daemon()
+        else:
+            logger.info('Git Auto Deploy started')
+
+        self.create_pid_file()
+
+        # Clear any existing lock files, with no regard to possible ongoing processes
+        for repo_config in self._config['repositories']:
+
+            # Do we have a physical repository?
+            if 'path' in repo_config:
+                Lock(os.path.join(repo_config['path'], 'status_running')).clear()
+                Lock(os.path.join(repo_config['path'], 'status_waiting')).clear()
+
+        try:
+            WebhookRequestHandler._config = self._config
+            self._server = HTTPServer((self._config['host'],
+                                       self._config['port']),
+                                      WebhookRequestHandler)
+            sa = self._server.socket.getsockname()
+            logger.info("Listening on %s port %s", sa[0], sa[1])
+            self._server.serve_forever()
+
+        except socket.error, e:
+
+            if not args.daemon_mode:
+                logger.critical("Error on socket: %s" % e)
+                GitAutoDeploy.debug_diagnosis(self._config['port'])
+
+            sys.exit(1)
+
+    def stop(self):
+        if self._server is not None:
+            self._server.socket.close()
+
+    def signal_handler(self, signum, frame):
+        import logging
+        logger = logging.getLogger()
+        self.stop()
+
+        if signum == 1:
+            self.run()
+            return
+
+        elif signum == 2:
+            logger.info('\nRequested close by keyboard interrupt signal')
+
+        elif signum == 6:
+            logger.info('Requested close by SIGABRT (process abort signal). Code 6.')
+
+        self.exit()
+
+
+def main():
+    import signal
+    from gitautodeploy import GitAutoDeploy
+
+    app = GitAutoDeploy()
+
+    signal.signal(signal.SIGHUP, app.signal_handler)
+    signal.signal(signal.SIGINT, app.signal_handler)
+    signal.signal(signal.SIGABRT, app.signal_handler)
+    signal.signal(signal.SIGPIPE, signal.SIG_IGN)
+
+    app.run()
+\ No newline at end of file