#!/usr/bin/python3
# vim:ts=2:sts=2:sw=2:et

import glob
import os.path
import re
import subprocess
import sys
import textwrap

class PoFileParser:
  licensesSeparator = ' or '
  _startOriginalHoldersLine = "# Copyright of the original manpage:"
  _startTranslatorsLine = "# Copyright © of Polish translation:"
  _endTranslatorsLine = 'msgid ""'
  _licenseRe = re.compile(r"^# Copyright © .*(\([^(]+\))$")
  _translatorsRe = re.compile(r"^# (.*<.*@.*>)")
  _licensesMap = {
      '4.4 BSD'         : 'BSD-4-clause',
      'BSD'             : 'BSD-4-clause',
      'GFDL'            : 'GFDL-NIV-1.3+',
      'GFDL-1.3+'       : 'GFDL-NIV-1.3+',
      'GPL'             : 'GPL-2+',
      'GPL2+'           : 'GPL-2+',
      'May be freely distributed' : 'freely-redistributable',
      'OpenSSH license' : 'openssh',
      'OpenSSL'         : 'openssl',
      'Info-ZIP'        : 'infozip',
      'PD'              : 'public-domain',
      'Public Domain'   : 'public-domain',
      'free'            : 'freely-redistributable',
      'public domain'   : 'public-domain',
      'MIT-Open group'  : 'MIT-Open-Group',
      'MIT-X Window System' : 'MIT-X-Window-System',
      'Prior BSD'       : 'Prior-BSD',
    }


  def __init__(self, fileName):
    self.fileName = fileName
    self.license = "unknown"
    self.copyrightHolders = []
    self._readFile()
    sorted(self.copyrightHolders)

  def _readFile(self):
      with open(self.fileName) as inFile:
        state = 0
        for line in inFile:
          line = line.strip()
          # Stop if the first msgid is found, we only need the header
          if line == self._endTranslatorsLine:
            break
          if line == self._startOriginalHoldersLine or line == self._startTranslatorsLine:
            state = state + 1
            continue
          match = None
          if state == 1:
            match = self._licenseRe.match(line)
          elif state == 2:
            match = self._translatorsRe.match(line)
          else:
            assert (state == 0)
            continue

          if not match or not match.group(1):
            self._unrecognizedLine(line)
          elif state == 1:
            self._setLicense(match.group(1))
          elif state == 2:
            self._appendTranslator(match.group(1))

    
  def _unrecognizedLine(self, line):
    if self.fileName == "../po/man1/xinit.1.po" and \
        line == "# Rafał Witowski (PTM), 1999.":
      self._appendTranslator(line[2:22])
    elif self.fileName == "../po/man5/initscript.5.po" and \
        line == "# pierwotny autor nieznany, ????.":
      pass
    else:
      print("Unrecognized line in %s: %s" %(self.fileName, line))

      if self.fileName == "../po/man1/x86_64-linux-gnu-ranlib.1.po":
        print("Trying to apply patches. Restart the script if done")
        rc = subprocess.run(["quilt", "push", "-a"], check=False).returncode
        assert rc == 0 or rc == 2
        sys.exit(1)

  def _setLicense(self, license):
    tmpLicenses = license.strip('"(),').split(self.licensesSeparator)

    for idx, licenseName in enumerate(tmpLicenses):
      licenseName = licenseName.strip()
      tmpLicenses[idx] = self._licensesMap.get(licenseName, licenseName)

    self.license =  self.licensesSeparator.join(sorted(tmpLicenses))
    
  def _appendTranslator(self, name):
    if name not in self.copyrightHolders:
      self.copyrightHolders.append(name) 


sortIdx = 0
def addToHolders(holders, key, shortFileName):
  global sortIdx
  defValue = (sortIdx, [])
  holders.setdefault(key, defValue)[1].append(shortFileName)
  sortIdx +=  1

def parsePoFiles(holders):
  for filename in sorted(glob.iglob("../po/man*/*po")):
    poFileData = PoFileParser(filename)
    shortFileName = filename[3:]
    key = (poFileData.license, tuple(poFileData.copyrightHolders))
    addToHolders(holders, key, shortFileName)

def parseOldPtmFile(holders):
  license = 'GPL-2'
  with open("./ptm_authors_by_manpage") as inFile:
    for line in inFile:
      data = line.split(';')
      fileName = data[0]
      if not os.path.exists("../" + fileName):
        continue
      copyrightHolders = []
      for name in data[1].split(','):
        copyrightHolders.append(name.strip(' \n'))
      key = (license, tuple(copyrightHolders))
      addToHolders(holders, key, fileName)

""" Returns autogenerated part of new copyright file, based on holders dictionary.
    Also fills licenses dictionary with names of licenses used by the holders"""
def generateListOfFiles(holders, licenses):
  autoGeneratedLines = ""
  for key, value in sorted(holders.items(), key=lambda kv: kv[1][0]):
    fileNames = value[1]
    fileNames += list(map(lambda f: "generated/%s" % f[3:-3] if f[:3] == "po/" else '', fileNames))
    files = ' '.join(fileNames)
    files = textwrap.wrap(files, width=69, break_long_words=False, break_on_hyphens=False)
    files = "\n       ".join(files)
    autoGeneratedLines += "Files: " + files + "\n"
    # Add the copyright holders
    names = ("\n           © ".join(key[1]))
    autoGeneratedLines += "Copyright: © " + names + "\n"
    # Last, add the license
    autoGeneratedLines += "License: " + key[0] + "\n\n"
    for license in key[0].split(PoFileParser.licensesSeparator):
      licenses.setdefault(license, None)
  return autoGeneratedLines


def _saveLicenseText(licenses, licenseName, licenseFieldContent):
  if licenseFieldContent == "" or licenseFieldContent.isspace():
    return
  if licenseName in licenses:
    licenses[licenseName] = licenseFieldContent
  else:
    print("Ignored license from copyright file: " + licenseName)


""" Returns header lines up to the first line that defines Files from po/ directory
    and fills licenses dictionary values from trailing lines of the copyright file"""
def parseExistingCopyright(licenses):
  manualLines = ""
  finishedManualLines = False
  licenseName = None
  licenseFieldContent = ""
  with open("copyright") as copyrightFile:
    for line in copyrightFile:      
      if line.startswith('Files: po/'):
        finishedManualLines = True
      elif not finishedManualLines:
        manualLines += line
      elif line.startswith('License: '):
        licenseName = line[9:].strip()
        licenseFieldContent = ""
      elif licenseName is not None: # parsing License field content
        if not line.isspace():
          licenseFieldContent += line
        else: # end of License field content
          _saveLicenseText(licenses, licenseName, licenseFieldContent)
          licenseName = None

  if licenseName is not None:
    _saveLicenseText(licenses, licenseName, licenseFieldContent)
  return manualLines

""" Generates the Licenses part of copyright file"""
def generateListOfLicenses(licenses):
  autoGeneratedLines = ""

  for licenseName, licenseText in sorted(licenses.items(), key=lambda kv: kv[0]):
    autoGeneratedLines += "\n\nLicense: " + licenseName + "\n"
    if licenseText is None:
      print ("Missing license text for "  + licenseName)
    else:
      autoGeneratedLines += licenseText 
  return autoGeneratedLines


""" Writes new copyright file"""
def writeCopyrightFile(manualLines, autoGeneratedLinesForFiles, 
                       autogeneratedLinesForLicenses):
  os.rename("copyright", "copyright~")
  with open("copyright", "w") as copyrightFile:
    copyrightFile.write(manualLines)
    copyrightFile.write(autoGeneratedLinesForFiles)
    copyrightFile.write(autogeneratedLinesForLicenses)

# Main function
copyrightHolders = dict()
parsePoFiles(copyrightHolders)
parseOldPtmFile(copyrightHolders)

licenses = dict()
autoGeneratedLinesForFiles = generateListOfFiles(copyrightHolders, licenses)
manualLines = parseExistingCopyright(licenses)
autogeneratedLinesForLicenses = generateListOfLicenses(licenses)

writeCopyrightFile(manualLines, autoGeneratedLinesForFiles, 
                   autogeneratedLinesForLicenses)
