From 8c6e8db1838271efa7d1b00cf1a55bc8a73ca553 Mon Sep 17 00:00:00 2001
From: Tobias Markmann <tm@ayena.de>
Date: Sun, 7 Jun 2015 17:24:38 +0200
Subject: Add support for updating copyright on files in Copyrighter.py script

Refactored the code to handle the following cases nicely:
- no copyright for current user present yet
- outdated copyright for current user present (with and without range)
- copyright present and up to date

Test-Information:

Tested with SWIFT_LICENSE_CONFIG set to "Isode Limited|default".

Change-Id: I4df475f7ecd55aebe892411b2323da50fcbca525

diff --git a/BuildTools/Copyrighter.py b/BuildTools/Copyrighter.py
index 56fcf01..a3b6379 100755
--- a/BuildTools/Copyrighter.py
+++ b/BuildTools/Copyrighter.py
@@ -7,17 +7,20 @@ DEFAULT_LICENSE = "gpl3"
 CONTRIBUTOR_LICENSE = "mit"
 LICENSE_DIR = "Documentation/Licenses"
 
+# The following regex parses license comment blocks and its part out of a complete source file.
+reParseLicenseCommentBlocks = re.compile(ur'(\/\*\n\s\*\sCopyright \(c\) (?P<startYear>\d\d\d\d)(-(?P<endYear>\d\d\d\d))? (?P<author>[^\n\.]*)\.?\n.\* (?P<license>[^\n]*)\n \* (?P<seeMore>[^\n]+)\n *\*\/)')
+
 class License :
 	def __init__(self, name, file) :
 		self.name = name
 		self.file = file
 
 licenses = {
-		"gpl3" : License("GNU General Public License v3", "GPLv3.txt"),
-		"mit" : License("MIT License", "MIT.txt"),
+		"default": License("All rights reserved.", "See the COPYING file for more information."),
+		"gpl3" : License("Licensed under the GNU General Public License v3.", "See " + LICENSE_DIR + "/" + "GPLv3.txt" + " for more information."),
+		"mit" : License("Licensed under the MIT License.", "See " + LICENSE_DIR + "/" +  "MIT.txt" + " for more information."),
 	}
 
-
 class Copyright :
 	def __init__(self, author, year, license) :
 		self.author = author
@@ -28,64 +31,52 @@ class Copyright :
 		return "\n".join([
 			comment_chars[0],
 			comment_chars[1] + " Copyright (c) %(year)s %(name)s" % {"year" : self.year, "name" : self.author },
-			comment_chars[1] + " Licensed under the " + licenses[self.license].name + ".",
-			comment_chars[1] + " See " + LICENSE_DIR + "/" + licenses[self.license].file + " for more information.",
+			comment_chars[1] + licenses[self.license].name,
+			comment_chars[1] + licenses[self.license].file,
 			comment_chars[2],
 			"\n"])
+	def __str__(self):
+		return """/*
+ * Copyright (c) %s %s.
+ * %s
+ * %s
+ */
+""" % (self.year, self.author, licenses[self.license].name, licenses[self.license].file)
+
+class ContentRef :
+	def __init__(self, begin, end, content):
+		self.begin = begin
+		self.end = end
+		self.content = content
+
+class CopyrightBlock :
+	def __init__(self, yearBegin, yearEnd, author, license, seeMore, total):
+		self.yearBegin = yearBegin
+		self.yearEnd = yearEnd
+		self.author = author
+		self.license = license
+		self.seeMore = seeMore
+		self.total = total
+
+def cref_from_group(match, group):
+	if match.group(group):
+		return ContentRef(match.start(group), match.end(group), match.group(group))
+	else :
+		return None
 
-def get_comment_chars_for_filename(filename) :
-	return ("/*", " *", " */")
-
-def get_comment_chars_re_for_filename(filename) :
-	comment_chars = get_comment_chars_for_filename(filename)
-	return "|".join(comment_chars).replace("*", "\\*")
-
-def parse_file(filename) :
-	file = open(filename)
-	copyright_text = []
-	prolog = ""
-	epilog = ""
-	inProlog = True
-	inCopyright = False
-	inEpilog = False
-	for line in file.readlines() :
-		if inProlog :
-			if line.startswith("#!") or len(line.strip()) == 0 :
-				prolog += line
-				continue
-			else :
-				inProlog = False
-				inCopyright = True
-
-		if inCopyright :
-			if re.match(get_comment_chars_re_for_filename(filename), line) != None :
-				copyright_text.append(line.rstrip())
-				continue
-			else :
-				inCopyright = False
-				inEpilog = True
-				if len(line.strip()) == 0 :
-					continue
-
-		if inEpilog :
-			epilog += line
-			continue
-
-	file.close()
-
-	# Parse the copyright
-	copyright = None
-	if len(copyright_text) == 5 :
-		comment_chars = get_comment_chars_for_filename(filename)
-		if copyright_text[0] == comment_chars[0] and copyright_text[4] == comment_chars[2] :
-			matchstring = "(" + get_comment_chars_re_for_filename(filename) + ") Copyright \(c\) (?P<startYear>\d\d\d\d)(-(?P<endYear>\d\d\d\d))? (?P<author>.*)"
-			m = re.match(matchstring, copyright_text[1])
-			if m != None :
-				# FIXME: Do better copyright reconstruction here
-				copyright = True
-	if not copyright :
-		epilog = "\n".join(copyright_text) + epilog
-	return (prolog, copyright, epilog)
+def parse_file_new(filename):
+	copyrightBlocks = []
+	with open(filename, 'r') as file:
+		content = file.read()
+		for match in re.finditer(reParseLicenseCommentBlocks, content):
+			copyrightBlocks.append(CopyrightBlock(
+				cref_from_group(match, "startYear"), 
+				cref_from_group(match, "endYear"), 
+				cref_from_group(match, "author"), 
+				cref_from_group(match, "license"), 
+				cref_from_group(match, "seeMore"), 
+				cref_from_group(match, 0)))
+	return copyrightBlocks
 
 def get_userinfo() :
 	p = subprocess.Popen("git config user.name", shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=(os.name != "nt"))
@@ -107,36 +98,91 @@ def get_copyright(username, email) :
 		license = CONTRIBUTOR_LICENSE
 	return Copyright(username, datetime.date.today().strftime("%Y"), license)
 
-def check_copyright(filename) :
-	(prolog, copyright, epilog) = parse_file(filename)
-	if copyright == None :
-		print "No copyright found in: " + filename
-		#print "Please run '" + sys.argv[0] + " set-copyright " + filename + "'"
+def get_copyright_setting(username, email) :
+	config = os.getenv("SWIFT_LICENSE_CONFIG")
+	if config :
+		copyrightHolder, license = config.split("|")
+	else :
+		if email.endswith("isode.com") or email in ["git@el-tramo.be", "git@kismith.co.uk", "tm@ayena.de"] :
+			copyrightHolder, license = "Isode Limited", "default"
+		else :
+			copyrightHolder, license = username, "mit"
+	return Copyright(copyrightHolder, datetime.date.today().year, license)
+
+def check_copyright(filename, hints) :
+	copyrightBlocks = parse_file_new(filename)
+	if copyrightBlocks :
+		# looking for copyright block for current author
+		username, email = get_userinfo()
+		copyrightSetting = get_copyright_setting(username, email)
+		for block in copyrightBlocks :
+			if block.author.content == copyrightSetting.author:
+				year = block.yearBegin.content if not block.yearEnd else block.yearEnd.content
+				if int(year) == copyrightSetting.year:
+					return True
+				else :
+					if hints :
+						print "Copyright block for " + copyrightSetting.author + " does not cover current year in: " + filename
+					return False
+		if hints :
+			print "Missing copyright block for " + copyrightSetting.author + " in: " + filename
+		return False
+	else :
+		if hints :
+			print "No copyright found in: " + filename
 		return False
+
+def replace_data_in_file(filename, begin, end, replaceWith) :
+	with open(filename, 'r') as file:
+		content = file.read()
+	with open(filename, 'w') as file:
+		file.write(content[:begin] + replaceWith + content[end:])
+
+def set_or_update_copyright(filename) :
+	if check_copyright(filename, False) :
+		print "No update required for file: " + filename
 	else :
-		return True
-
-def set_copyright(filename, copyright) :
-	(prolog, c, epilog) = parse_file(filename)
-	comment_chars = get_comment_chars_for_filename(filename)
-	copyright_text = copyright.to_string(comment_chars)
-	file = open(filename, "w")
-	if prolog != "":
-		file.write(prolog)
-	file.write(copyright_text)
-	if epilog != "" :
-		file.write(epilog)
-	file.close()
+		copyrightBlocks = parse_file_new(filename)
+		username, email = get_userinfo()
+		copyrightSetting = get_copyright_setting(username, email)
+		lastBlock = 0
+		for block in copyrightBlocks :
+			if block.author.content == copyrightSetting.author :
+				if not block.yearEnd :
+					# replace year with range
+					replace_data_in_file(filename, block.yearBegin.begin, block.yearBegin.end, "%s-%s" % (block.yearBegin.content, str(copyrightSetting.year)))
+				else :
+					# replace end of range with current year
+					replace_data_in_file(filename, block.yearEnd.begin, block.yearEnd.end, "%s" % str(copyrightSetting.year))
+				return
+			lastBlock = block.total.end
+
+		# No copyright block found. Append a new one.
+		replace_data_in_file(filename, lastBlock+1, lastBlock+1, "\n" + str(copyrightSetting))
+
+def print_help() :
+	print """Usage:
+	Copyrighter.py check-copyright $filename
+		Cheks for the existence of a copyright comment block.
+
+	Copyrighter.py set-copyright $filename
+		Adds or updates the existing copyright comment block.
+
+	License setting:
+		A users license configuration can be set via the SWIFT_LICENSE_CONFIG environment variable 
+		in the format "$copyright holder|$license", e.g. "Jane Doe|mit". Possible values for 
+		$license are default, mit and gpl.
+	"""
 
 if sys.argv[1] == "check-copyright" :
 	file = sys.argv[2]
 	if (file.endswith(".cpp") or file.endswith(".h")) and not "3rdParty" in file :
-		if not check_copyright(file) :
+		if not check_copyright(file, True) :
 			sys.exit(-1)
 elif sys.argv[1] == "set-copyright" :
-	(username, email) = get_userinfo()
-	copyright = get_copyright(username, email)
-	set_copyright(sys.argv[2], copyright)
+	file = sys.argv[2]
+	set_or_update_copyright(file)
 else :
 	print "Unknown command: " + sys.argv[1]
+	print_help()
 	sys.exit(-1)
-- 
cgit v0.10.2-6-g49f6