#!/usr/bin/env python3
# Copyright 2018 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""A tool for running diffing tools and measuring patch sizes."""

import argparse
import logging
import os
import subprocess
import sys
import tempfile


class Error(Exception):
    """Puffin general processing error."""


def ParseArguments(argv):
    """Parses and Validates command line arguments.

    Args:
        argv: command line arguments to parse.

    Returns:
        The arguments list.
    """
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--src-corpus",
        metavar="DIR",
        help="The source corpus directory with compressed files.",
    )
    parser.add_argument(
        "--tgt-corpus",
        metavar="DIR",
        help="The target corpus directory with compressed files.",
    )
    parser.add_argument(
        "--debug", action="store_true", help="Turns on verbosity."
    )

    # Parse command-line arguments.
    args = parser.parse_args(argv)

    for corpus in (args.src_corpus, args.tgt_corpus):
        if not corpus or not os.path.isdir(corpus):
            raise Error(
                "Corpus directory {} is non-existent or inaccesible".format(
                    corpus
                )
            )
    return args


def main(argv):
    """The main function."""
    args = ParseArguments(argv[1:])

    if args.debug:
        logging.getLogger().setLevel(logging.DEBUG)

    # Construct list of appropriate files.
    src_files = list(
        filter(
            os.path.isfile,
            [
                os.path.join(args.src_corpus, f)
                for f in os.listdir(args.src_corpus)
            ],
        )
    )
    tgt_files = list(
        filter(
            os.path.isfile,
            [
                os.path.join(args.tgt_corpus, f)
                for f in os.listdir(args.tgt_corpus)
            ],
        )
    )

    # Check if all files in src_files have a target file in tgt_files.
    files_mismatch = set(map(os.path.basename, src_files)) - set(
        map(os.path.basename, tgt_files)
    )
    if files_mismatch:
        raise Error(
            "Target files {} do not exist in corpus: {}".format(
                files_mismatch, args.tgt_corpus
            )
        )

    for src in src_files:
        with tempfile.NamedTemporaryFile() as puffdiff_patch, tempfile.NamedTemporaryFile() as bsdiff_patch:
            tgt = os.path.join(args.tgt_corpus, os.path.basename(src))

            operation = "puffdiff"
            cmd = [
                "puffin",
                "--operation={}".format(operation),
                "--src_file={}".format(src),
                "--dst_file={}".format(tgt),
                "--patch_file={}".format(puffdiff_patch.name),
            ]
            # Running the puffdiff operation
            if subprocess.call(cmd) != 0:
                raise Error(
                    "Puffin failed to do {} command: {}".format(operation, cmd)
                )

            operation = "bsdiff"
            cmd = ["bsdiff", "--type", "bz2", src, tgt, bsdiff_patch.name]
            # Running the bsdiff operation
            if subprocess.call(cmd) != 0:
                raise Error(
                    "Failed to do {} command: {}".format(operation, cmd)
                )

            logging.debug(
                "%s(%d -> %d) : bsdiff(%d), puffdiff(%d)",
                os.path.basename(src),
                os.stat(src).st_size,
                os.stat(tgt).st_size,
                os.stat(bsdiff_patch.name).st_size,
                os.stat(puffdiff_patch.name).st_size,
            )

    return 0


if __name__ == "__main__":
    sys.exit(main(sys.argv))
