#!/usr/bin/env python3
#
# Copyright © 2020-2022 Chris Lamb <lamby@debian.org>
#
# diffoscope is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# diffoscope is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.

import re
import sys
import json
import click
import fileinput


@click.command()
def main():
    """
    Takes the output from `diffoscope --json=X` and attempts to guess which
    issue(s) the package is affected by. For example:

        $ diffoscope --json=output.json a.dsc b.dsc
        $ bin/auto-classify output.json
        build_path_captured_by_octave
        $

    If no arguments are provided, the JSON is read from standard input so
    you can pipe it in. For example:

        $ diffoscope --json=- a.dsc b.dsc | bin/auto-classify
        build_path_captured_by_octave
    """

    raw = "".join(fileinput.input())

    for x in walk_json(json.loads(raw)):
        print(x)


def walk_json(elem):
    if isinstance(elem, list):
        for x in elem:
            for y in walk_json(x):
                yield y
        return

    if not isinstance(elem, dict):
        return

    for k, v in elem.items():
        for y in walk_json(v):
            yield y

        if k != "source1":
            continue

        if v.endswith(".qch"):
            yield "timestamps_in_qhc"

        if v.endswith(".vo"):
            yield "nondeterministic_vo_files_generated_by_coq"

        if v.endswith("/pkgjs-lock.json"):
            yield "pkgjs_lock_json_file_issue"

        if v.endswith(".reverse.bin"):
            yield "nondeterminism_in_files_generated_by_rime_deployer"

        if v.endswith("mastersummary.xml"):
            yield "mono_mastersummary_xml_files_inherit_filesystem_ordering"

        if v.endswith(".h5ad"):
            yield "captures_build_path_in_hd5_database_files"

        if v.endswith(".3d"):
            yield "timestamps_in_3d_files_created_by_survex"

        if v.endswith(".tag.xml"):
            yield "build_dir_in_tags_generated_by_doxygen"

        if v.endswith("/direct_url.json"):
            yield "build_path_in_direct_url_json_file_generated_by_flit"

        if v.endswith(".cmt"):
            yield "randomness_in_ocaml_cmti_files"

        if v.endswith(".schema.yaml"):
            yield "timestamps_added_by_librime"

        if v.endswith(".bc"):
            yield "randomness_in_postgres_opcodes"

        if v.endswith("/searchindex.js"):
            yield "randomness_in_documentation_generated_by_sphinx"

        if v.endswith("/navtreeindex2.js"):
            yield "nondeterministic_ordering_in_documentation_generated_by_doxygen"

        if v.endswith(".repo-id"):
            yield "randomness_in_perl6_precompiled_libraries"

        if "/var/lib/coq/md5sums/" in v and v.endswith(".checksum"):
            yield "nondeterministic_checksum_generated_by_coq"

        diff = elem["unified_diff"]

        if not diff:
            continue

        if v.endswith(".1") and "Generated Python Manual" in diff:
            yield "timezone_variant_in_argparse_manpage"

        if v == "Sphinx inventory" and "Indeks" in diff:
            yield "sphinxdoc_translations"

        if v.startswith("Pretty-printed") and "webpack:" in diff:
            yield "nondetermistic_js_output_from_webpack"

        if v.startswith("js-beautify") and "__webpack_" in diff:
            yield "nondetermistic_js_output_from_webpack"

        if v.endswith("sng") and "PlantUML" in diff:
            yield "png_generated_by_plantuml_captures_kernel_version_and_builddate"

        if v.endswith("pom.properties") and "org.apache.felix.bundleplugin" in diff:
            yield "date_added_to_pom_properties_by_felix_bundleplugin"

        if v.endswith(".ppu") and "202" in diff:
            yield "timestamps_in_ppu_generated_by_fpc"

        if "qt_resource_data" in diff:
            yield "timestamps_in_source_generated_by_rcc"

        if v.startswith("ghc --show-iface") and "ABI hash" in diff:
            yield "haskell_abi_hash_differences"

        if v.endswith(".html") and "/build/1st/" in diff and "Doxygen" in diff:
            yield "absolute_build_dir_in_docs_generated_by_doxygen_ref"

        if v.endswith(".html") and "Created: " and "validator.w3.org" in diff:
            yield "timestamps_in_documentation_generated_by_org_mode"

        if v.endswith(".beam") and "/build/1st/" in diff:
            yield "captures_build_path_in_beam_cma_cmt_files"

        if v.endswith("objects.inv"):
            yield "randomness_in_objects_inv"

        if v.endswith("h5dump {}") and "HDF5" in diff:
            yield "captures_build_path_in_hd5_database_files"

        if (
            v.endswith(".py")
            and "Form implementation generated from reading ui file" in diff
        ):
            yield "build_path_captured_by_pyuic5"

        if "hevea" in diff and "/build/1st/" in diff:
            yield "hevea_captures_build_path"

        if v.endswith(".pdf") and "CreationDate" in diff and "DAPS" in diff:
            yield "timestamps_in_pdf_generated_by_daps"

        if v.endswith(".prl") and "/build/1st/" in diff:
            yield "captures_build_dir_in_qmake_prl_files"

        if v.endswith(".devhelp2"):
            yield "nondeterministic_devhelp_documentation_generated_by_gtk_doc"

        if v.endswith("ppudump {}") and "2021/" in diff:
            yield "timestamps_in_ppu_generated_by_fpc"

        if v.startswith("msgunfmt {}") and "PO-Revision-Date" in diff:
            yield "different_pot_creation_date_in_gettext_mo_files"

        if v.startswith("msgunfmt {}") and "PO-Revision-Date" in diff:
            yield "different_pot_creation_date_in_gettext_mo_files"

        if v.startswith("readelf "):
            if "note.go.buildid" in diff:
                yield "randomness_in_binaries_generated_by_golang"

            if "/build/1st/octave-" in diff:
                yield "build_path_captured_by_octave"

        if "../../../../../usr/" in diff:
            yield "captures_varying_number_of_build_path_directory_components"

        if "__pyx_" in diff:
            yield "random_hashes_in_cython_output"

        if v == "file list" and re.search(r"-drwxr-xr-x   0 pbuilder1.*/usr/lib/debug/.dwz/", diff):
            yield "usr_lib_debug_dotdwz_dir_inherits_build_user"

        if v.startswith("objdump "):

            if re.search(r"-/build/1st/[^\n]+/\w+\.(c|h|cpp)]:\d+\n", diff):
                yield "captures_build_path_via_assert"

            elif re.search(r"-/build/1st/", diff):
                if "caml" in diff:
                    yield "ocaml_captures_build_path"
                elif "octave-" in diff:
                    yield "build_path_captured_by_octave"
                else:
                    yield "captures_build_path"

            elif re.search(r"-[^\n]+x86_64", diff):
                yield "captures_kernel_variant"

        if v.startswith("strings "):

            if "-ffile-prefix-map=/build/" in diff:
                yield "records_build_flags"

            if re.search(r"\.cpp\.\w{8}", diff):
                yield "kodi_package_captures_build_path_in_source_filename_hash"

            if re.search(r"-/build/1st/[^\n]+/\w+\.f\n", diff):
                yield "fortran_captures_build_path"


if __name__ == "__main__":
    sys.exit(main())
