#! /usr/libexec/platform-python -s
# -*- mode:python -*-
# vim: ts=4 sw=4 smarttab expandtab

import argparse
import grp
import logging
import os
import pwd
import signal
import socket
import subprocess
import sys
import time

logging.basicConfig(level=logging.INFO)
log = logging.getLogger('ceph-crash')

auth_names = ['client.crash.%s' % socket.gethostname(),
              'client.crash',
              'client.admin']


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-p', '--path', default='/var/lib/ceph/crash',
        help='base path to monitor for crash dumps')
    parser.add_argument(
        '-d', '--delay', default=10.0, type=float,
        help='minutes to delay between scans (0 to exit after one)',
    )
    parser.add_argument(
        '--name', '-n',
        help='ceph name to authenticate as '
             '(default: try client.crash, client.admin)')
    parser.add_argument(
        '--log-level', '-l',
        help='log level output (default: INFO), support INFO or DEBUG')

    return parser.parse_args()


def post_crash(path):
    rc = 0
    for n in auth_names:
        pr = subprocess.Popen(
            args=['timeout', '30', 'ceph',
                  '-n', n,
                  'crash', 'post', '-i', '-'],
            stdin=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        f = open(os.path.join(path, 'meta'), 'rb')
        (_, stderr) = pr.communicate(input=f.read())
        stderr = stderr.decode()
        rc = pr.wait()
        f.close()
        if rc != 0 or stderr != "":
            log.warning('post %s as %s failed: %s' % (path, n, stderr))
        if rc == 0:
            break
    return rc


def scrape_path(path):
    for p in os.listdir(path):
        crashpath = os.path.join(path, p)
        if not os.access(crashpath, os.R_OK):
            log.warning('unable to read crash path %s' % (crashpath))
            continue
        metapath = os.path.join(crashpath, 'meta')
        donepath = os.path.join(crashpath, 'done')
        if os.path.isfile(metapath):
            if not os.path.isfile(donepath):
                # hang out just for a bit; either we interrupted the dump
                # or the daemon crashed before finishing it
                time.sleep(1)
                if not os.path.isfile(donepath):
                    return
            # ok, we can process this one
            rc = post_crash(crashpath)
            if rc == 0:
                os.rename(crashpath, os.path.join(path, 'posted/', p))
                log.debug(
                    "posted %s and renamed %s -> %s " %
                    (metapath, p, os.path.join('posted/', p))
                )


def handler(signum, frame):
    print('*** Interrupted with signal %d ***' % signum)
    sys.exit(0)


def drop_privs():
    if os.getuid() == 0:
        try:
            ceph_uid = pwd.getpwnam("ceph").pw_uid
            ceph_gid = grp.getgrnam("ceph").gr_gid
            os.setgroups([])
            os.setgid(ceph_gid)
            os.setuid(ceph_uid)
        except Exception as e:
            log.error(f"Unable to drop privileges: {e}")
            sys.exit(1)


def main():
    global auth_names

    # run as unprivileged ceph user
    drop_privs()

    # exit code 0 on SIGINT, SIGTERM
    signal.signal(signal.SIGINT, handler)
    signal.signal(signal.SIGTERM, handler)

    args = parse_args()
    if args.log_level == 'DEBUG':
        log.setLevel(logging.DEBUG)

    postdir = os.path.join(args.path, 'posted')
    if args.name:
        auth_names = [args.name]

    while not os.path.isdir(postdir):
        log.error("directory %s does not exist; please create" % postdir)
        time.sleep(30)

    log.info("pinging cluster to exercise our key")
    pr = subprocess.Popen(args=['timeout', '30', 'ceph', '-s'])
    pr.wait()

    log.info("monitoring path %s, delay %ds" % (args.path, args.delay * 60.0))
    while True:
        try:
            scrape_path(args.path)
        except Exception as e:
            log.error(f"Error scraping {args.path}: {e}")
        if args.delay == 0:
            sys.exit(0)
        time.sleep(args.delay * 60)


if __name__ == "__main__":
    main()
