import argparse
import json
import os
from pathlib import Path


def get_package_files(repo_dir):
    # Get a list of all the Packages files
    package_files = []

    for path in Path(repo_dir).rglob('Packages'):
        package_files.append(path.absolute())

    return package_files


def get_distribution(package_file):
    # Find the Release file corresponding to the Packages file, and get the
    # distribution name from it.
    with open(os.path.dirname(package_file) + '/Release', "r") as release_data:
        for release_line in release_data:
            if release_line.startswith("Archive: "):
                return release_line[9:]

    return None


def get_licence(deb_file):
    # TODO: Figure out a sane way to get the license
    license = ''

    return license


def get_packages(package_file):
    # Extract all the packages from a package file
    packages = []

    with open(package_file, "r") as package_data:
        package = ""
        first = True

        for line in package_data:
            if line.startswith('Package: ') and not first:
                # Get the distribution
                distribution = get_distribution(package_file)
                if distribution is not None:
                    package = package + '\nDistribution: ' + distribution

                packages.append(package)
                package = ""
            else:
                first = False

            package = package + line

    return packages


def get_package(package_data):
    # Decode a Package entry into a dictionary
    package = {}
    in_description = False

    for line in package_data.splitlines():
        # Package
        if line.startswith("Package: "):
            package['Package'] = line[9:]

        # Version
        if line.startswith("Version: "):
            # The build is normally prefixed with a -, but sometimes
            # just .pgdg
            package['Version'] = line[9:].split('-')[0]
            package['Build'] = ''.join(line[9:].split('-')[1:])

            if '.pgdg' in package['Version']:
                version = line[9:].split('.pgdg')
                package['Version'] = version[0]
                package['Build'] = 'pgdg' + version[1]

        # Architecture
        if line.startswith("Architecture: "):
            package['Architecture'] = line[14:]

        # Filename
        if line.startswith("Filename: "):
            package['Filename'] = line[10:]

            # Licence
            licence = get_licence(line[10:])
            if licence is not None:
                package['Licence'] = licence

        # Description. This can be multi-line. Treat the first line
        # as normal, then scan the rest until we hit the end
        if in_description:
            if line.strip() == ".":
                package['Description'] = package['Description'] + "\n"

            # The description ends when we encounter a line that doesn't start
            # with a space.
            elif not line.startswith(" "):
                in_description = False
            else:
                package['Description'] = \
                    package['Description'] + '\n' + line.strip()

        if line.startswith("Description: "):
            package['Description'] = line[13:]
            in_description = True

        # Distribution/Repo
        if line.startswith("Distribution: "):
            package['Distribution'] = line[14:].split('-')[0]
            package['Repo'] = line[14:]

        # URL
        if line.startswith("Homepage: "):
            package['Url'] = line[10:]

        # Packager
        if line.startswith("Maintainer: "):
            package['Maintainer'] = line[12:]

    return package


def main():
    # Command line arguments
    parser = argparse.ArgumentParser(description='Scan a set of APT repos and '
                                                 'generate a JSON catalog of '
                                                 'the contents.')
    parser.add_argument("repo", help="the repo directory, or directory "
                                     "containing multiple repos")

    args = parser.parse_args()

    package_info = []
    package_files = get_package_files(args.repo)

    for package_file in package_files:
        packages = get_packages(package_file)

        for package in packages:
            package_info.append(get_package(package))

    with open('apt.json', 'w') as output_file:
        json.dump(package_info, output_file, indent=2, sort_keys=True)


if __name__ == "__main__":
    main()
