#!/usr/bin/python3
# -*- coding: utf-8 -*-

# clean-notes: sort and clean the notes stored in notes.git
# Copyright © 2015 Chris Lamb <lamby@debian.org>
#             2015 Mattia Rizzolo <mattia@mapreri.org>
# Licensed under WTFPL — http://www.wtfpl.net/txt/copying/
#
# Depends: python3 python3-apt python3-yaml python3-requests python3-psycopg2

from apt_pkg import version_compare

import apt
import sys
import yaml
import argparse

from rblib import logger
from rblib import remote
from rblib import yamlfiles

# "apt" is to avoid adding an ugly "apt_pkg.init()" call to fix a "ValueError:
# _system not initialized" error.
apt = apt

# {package_name: {version: 0.0.0, comments: "blablabla", bugs: [111, 222],
#  issues: [issue1, issue2]}, {...}}

desc = """Housekeep the packages.yml file from the notes.git repository.

This script is also able to show you other information:
* missing-version: list notes without version
* fixed-magically: list notes which version is marked as reproducible
* now-fixed: list notes of package where a greater version is reproducible
* new-tested-version: there is a new version tested in jenkins, but the
  package is not reproducible anyway...
* missing-usertagged: list usertagged bugs but not listed in the notes
* not-usertagged: list bugs listed in notes but without usertags
* archived-bugs: list bugs that are archived, probably they need to be removed
"""
parser = argparse.ArgumentParser(
    description=desc, formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument(
    "-e",
    "--enable",
    action="append",
    default=[],
    help="enable a particular view (choose from above)",
)
parser.add_argument(
    "-d",
    "--disable",
    action="append",
    default=[],
    help="disable a particular view (choose from above)",
)
parser.add_argument(
    "--ignore-duplicates",
    action="store_true",
    help="ATTENTION! This option remove any duplicated " + "field present in the file!",
)
parser.add_argument(
    "-s", "--sort-only", action="store_true", help="Only sort the notes, do no clean up"
)
parser.add_argument("-v", "--verbose", action="store_true")
parser.add_argument("-n", "--dry-run", action="store_true")

args = type(
    "DefaultArgs",
    (object,),
    {
        "verbose": False,
        "ignore_duplicates": False,
    },
)


if __name__ == "__main__":
    args = parser.parse_args()

    disabled_features = [
        "fixed-magically",
        "new-tested-version",
        "missing-usertagged",
        "not-usertagged",
        "now-fixed",
        #        'missing-version',
        "archived-bugs",
    ]
    args.disable.extend(disabled_features)
    for feature in args.enable:
        if feature in args.disable:
            args.disable.remove(feature)

log = logger.setup_logging(__name__, args.verbose)
log.debug(args)

# pyyaml does not check for duplicates when reading yaml.
#
# This is bad because it means that one either has to check for duplicates
# manually before entering info about a new package or some info will get lost
# when running this script because only one item will be remaining after
# parsing
#
# So instead, lets throw an error if a duplicate key was found. Using the line
# and column number in the error output, it is easy to find the offending key
# and manually merge their content.
#
# It seems the only way to do this is to monkey-patch the pyyaml loader.
# To allow ScalarNode objects in a dictionary, their __eq__ and __hash__
# methods have to be patched.
#
# To check for duplicates while parsing, below function compose_mapping_node
# was taken from the pyyaml sources (Copyright © 2006 Kirill Simonov
# <xi@resolvent.net> under a Expat/MIT license) and modified with a set of
# already seen nodes.


def scalar_node_eq(self, other):
    return self.id == other.id and self.tag == other.tag and self.value == other.value


yaml.nodes.ScalarNode.__eq__ = scalar_node_eq


def scalar_node_hash(self):
    return hash((self.id, self.tag, self.value))


yaml.nodes.ScalarNode.__hash__ = scalar_node_hash


def compose_mapping_node(self, anchor):
    start_event = self.get_event()
    tag = start_event.tag
    if tag is None or tag == "!":
        tag = self.resolve(yaml.nodes.MappingNode, None, start_event.implicit)
    node = yaml.nodes.MappingNode(
        tag, [], start_event.start_mark, None, flow_style=start_event.flow_style
    )
    if anchor is not None:
        self.anchors[anchor] = node
    seen = set()
    while not self.check_event(yaml.events.MappingEndEvent):
        key_event = self.peek_event()
        item_key = self.compose_node(node, None)
        if item_key in seen:
            raise yaml.composer.ComposerError(
                "while composing a mapping",
                start_event.start_mark,
                "found duplicate key",
                key_event.start_mark,
            )
        seen.add(item_key)
        item_value = self.compose_node(node, item_key)
        node.value.append((item_key, item_value))
    end_event = self.get_event()
    node.end_mark = end_event.end_mark
    return node


if __name__ == "__main__":
    if not args.ignore_duplicates:
        yaml.composer.Composer.compose_mapping_node = compose_mapping_node


def check_notes_validity(notes, testedpkgs):
    errors = False
    for pkg in sorted(notes):
        note = notes[pkg]
        if pkg not in testedpkgs:
            if "issues" in note and "ftbfs_in_jenkins_setup" in note["issues"]:
                pass
            else:
                log.critical(
                    "the package %s is not known to the testing system. Maybe it's misspelled or removed? Check https://tracker.debian.org/pkg/%s",
                    pkg,
                    pkg,
                )
                errors = True
            continue
        if testedpkgs[pkg]["status"] == "blacklisted":
            # blacklisted packages really need to have any of these tags
            # applied (and possibly also a comment describing so)
            valid_tags = {
                "ftbfs_in_jenkins_setup",
                "blacklisted_on_jenkins",
                "blacklisted_on_jenkins_armhf_only",
            }
            if not valid_tags.intersection(note.get("issues", [])):
                log.critical(
                    "the package %s is blacklisted, but no note was found", pkg
                )
                errors = True
    return errors


def check_bugs(notes):
    """
    This function check that all the bugs listed in notes.git are usertagged
    """
    bugs = []
    for pkg in sorted(notes, key=str):
        if "bugs" in notes[pkg]:
            for bug in notes[pkg]["bugs"]:
                bugs.append((str(pkg), int(bug)))
    log.debug(
        "looking throught bugs listed in the notes and check whether "
        + "they are usertagged"
    )
    if not bugs:
        return
    bugs_list = sorted([x[1] for x in bugs])
    bugs_package = {x[1]: x[0] for x in bugs}
    ids = "id="
    for bug in bugs_list[:-1]:
        ids += "%s OR id=" % bug
    ids += str(bugs_list[-1])
    rows = remote.query_udd(
        conn_udd,
        """SELECT id FROM bugs_usertags WHERE
                     email='reproducible-builds@lists.alioth.debian.org' AND (
                     %s )"""
        % ids,
    )
    import webbrowser

    for bug in bugs_list:
        # the results from SELECT are a list of one-element tuples, so we have
        # have to look up 1-tuples with the bug number in the list
        if (bug,) not in rows:
            log.info(
                "https://bugs.debian.org/%s in package %s " "is not usertagged",
                bug,
                bugs_package[bug],
            )
            webbrowser.open("https://bugs.debian.org/{}".format(bug))


def find_old_notes(testedpkgs, notes):
    log.debug("parsing the reproducible.json and the notes to find weirdness")
    toremove = []
    for pkg in sorted(notes, key=str):
        if "version" not in notes[pkg] and "missing-version" not in args.disable:
            log.info("There is no version set for the package " + pkg)
            continue
        try:
            item = testedpkgs[pkg]
        except KeyError:
            # this is due to
            # https://anonscm.debian.org/cgit/qa/jenkins.debian.net.git/commit/?id=275309
            # and later commits to that file, otherwise this would be
            # quite a issue
            log.debug(pkg + " was not tested. Skipping cruft check.")
            continue
        if (
            item["version"] == notes[pkg].get("version")
            and item["status"] == "reproducible"
            and (
                notes[pkg].get("issues")
                and "timestamps_in_png" not in notes[pkg]["issues"]
            )
            and "fixed-magically" not in args.disable
        ):
            log.info(
                "%s/%s has a note for the version %s but that version is "
                "reproducible",
                remote.RB_SITE,
                pkg,
                item["version"],
            )
        if (
            item["status"] == "reproducible"
            and notes[pkg].get("version")
            and "now-fixed" not in args.disable
            and (
                notes[pkg].get("issues")
                and "timestamps_in_png" not in notes[pkg]["issues"]
            )
            and version_compare(str(item["version"]), str(notes[pkg]["version"])) > 0
        ):
            log.info(
                "The package "
                + pkg
                + " is now reproducible but still listed in the notes"
            )
            toremove.append(pkg)
        if (
            notes[pkg].get("version")
            and version_compare(str(item["version"]), str(notes[pkg]["version"])) > 0
        ):
            if "new-tested-version" not in args.disable:
                log.info("The package " + pkg + " has a new tested version")
    return toremove


def get_bugs():
    query = (
        "SELECT * FROM bugs_usertags "
        + "WHERE email='reproducible-builds@lists.alioth.debian.org'"
    )
    rows = remote.query_udd(conn_udd, query)
    # returns a list of tuples (email, tag, id)
    bugs = {}
    for tag in rows:
        try:
            bugs[tag[2]].append(tag[1])
        except KeyError:
            bugs[tag[2]] = [tag[1]]
    return bugs


def detect_archived_bugs(notes):
    query = (
        "SELECT u.id "
        "FROM bugs_usertags AS u JOIN archived_bugs AS a ON u.id=a.id "
        "WHERE u.email='reproducible-builds@lists.alioth.debian.org'"
    )
    rows = [x[0] for x in remote.query_udd(conn_udd, query)]
    for pkg in sorted(notes.keys()):
        try:
            for bug in notes[pkg]["bugs"]:
                if bug in rows:
                    log.warning(
                        "https://bugs.debian.org/%s in %s/%s is archived",
                        bug,
                        remote.RB_SITE,
                        pkg,
                    )
                    notes[pkg]["bugs"].remove(bug)
        except KeyError:
            pass


def parse_bugs(bugs):
    """
    This function return a dict:
    { "package_name": {
        "bugs": [bug1, bug2],
         "usertags": ["usertag1", "usertag2"]
       }
    }

    The `bugs` argument is {bug_number: ["usertag1", "usertag2"]}
    """
    log.debug("find out if filed bugs are also noted in the notes")
    packages = {}
    ids = ""
    bugs_list = sorted(bugs.keys())
    ignored_tags = ["toolchain", "infrastructure"]
    OR = ""
    for bug in bugs_list[:-1]:
        if not [i for i in bugs[bug] if i in ignored_tags]:
            ids += "%sb.id=%s" % (OR, bug)
            OR = " OR "
    if not [i for i in bugs[bugs_list[-1]] if i in ignored_tags]:
        ids += OR + "b.id=%s" % bugs_list[-1]
    # the join avoid picking virtual packages
    query = "SELECT DISTINCT b.id, b.source, b.done "
    query += "FROM bugs AS b JOIN sources AS s ON b.source=s.source "
    query += "WHERE %s" "" % ids
    log.debug(query)
    rows = remote.query_udd(conn_udd, query)
    log.info("%d bugs found", len(rows))
    for item in rows:
        if item[2]:  # do not consider closed bugs
            continue
        try:
            packages[item[1]]["bugs"].append(item[0])
        except KeyError:
            try:
                packages[item[1]]["bugs"] = [item[0]]
            except KeyError:
                packages[item[1]] = {}
                packages[item[1]]["bugs"] = [item[0]]
        for tag in bugs[item[0]]:
            try:
                packages[item[1]]["usertags"].append(tag)
            except KeyError:
                packages[item[1]]["usertags"] = [tag]
    return packages


def join_notes_bugs(notes, bugs):
    for package in sorted(bugs):
        if package not in testedpkgs:
            # maybe the bug is for a removed package?
            continue
        for bug in sorted(bugs[package]["bugs"]):
            try:
                if "bugs" in notes[package]:
                    if bug not in notes[package]["bugs"]:
                        log.info(
                            "https://bugs.debian.org/%s"
                            " in package %s"
                            " is not listed in notes.git.",
                            bug,
                            package,
                        )
                        notes[package]["bugs"].append(bug)
                else:
                    log.info(
                        "https://bugs.debian.org/%s in %s is not listed "
                        "in notes.git.",
                        bug,
                        package,
                    )
                    notes[package]["bugs"] = [bug]
            except KeyError:
                log.info(
                    "https://bugs.debian.org/%s in " "%s is not listed in notes.git.",
                    bug,
                    package,
                )
                notes[package] = {}
                notes[package]["bugs"] = [bug]
                # just try guessing the version (prefers unstable)
                notes[package]["version"] = testedpkgs[package]["version"]
    return notes


def cleanup_notes(notes, toremove):
    for pkg in toremove:
        del notes[pkg]
    return notes


if __name__ == "__main__":
    if (
        "not-usertagged" not in args.disable
        or "missing-usertagged" not in args.disable
        or "archived-bugs" not in args.disable
    ):
        conn_udd = remote.start_udd_connection()
    notes = yamlfiles.load_notes()
    if not args.sort_only:
        testedpkgs = remote.load_reproducible_status()
        errors = check_notes_validity(notes, testedpkgs)
        toremove = find_old_notes(testedpkgs, notes)
    else:
        toremove = []
    notes = cleanup_notes(notes, toremove)
    if "not-usertagged" not in args.disable:
        check_bugs(notes)
    if "missing-usertagged" not in args.disable:
        bugs = get_bugs()
        bugs = parse_bugs(bugs)
        notes = join_notes_bugs(notes, bugs)
    if "archived-bugs" not in args.disable:
        detect_archived_bugs(notes)
    if not args.dry_run:
        yamlfiles.write_out(notes)
    else:
        log.info(
            "Don't write out the %s file, as requested (dry-run).", yamlfiles.NOTES_YAML
        )
    sys.exit(errors)
