-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathcheck_copyright.py
executable file
·182 lines (142 loc) · 5.87 KB
/
check_copyright.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/usr/bin/env python3
# Copyright (c) 2022 Arm Limited.
#
# SPDX-License-Identifier: MIT
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
'''
This script will check the files passed as arguments for a valid copyright
header. It will run against all changed files as part of the pre-commit hook
after running `pre-commit install` in the root of the repository. It can also
be run manually e.g. `./check_copyright.py file1 dir1/file2`
A valid copyright header includes the word 'Copyright' followed by the
relevant copyright years. It also requires an SPDX licence id string identifying
the license as MIT, as follows: 'SPDX-License-Identifier: MIT'
'''
import datetime
import re
import sys
from typing import List
# Set the limit on number of lines at top of file to search
MAX_SEARCH_LINES = 20
CURRENT_YEAR = datetime.datetime.now().year
# Regex for positive copyright string
COPYRIGHT_YEAR_REGEX = re.compile(
r".*\bCOPYRIGHT.*%s.*" % str(CURRENT_YEAR), re.IGNORECASE
)
# Regex for positive SPDX id string
SPDX_REGEX = re.compile(r".*SPDX-License-Identifier: MIT.*", re.IGNORECASE)
# To match eg: "Copyright (C) 2014-2021"
PATTERN_STRING = r"\bCopyright\b.*[0-9,)]"
PATTERN_COPYRIGHT = re.compile(PATTERN_STRING, re.IGNORECASE)
def generate_years_string(years: List[int]) -> str:
"""
Create a compacted string representation of a list of years.
E.g. [1991, 2001, 2002, 2003, 2006, 2007] becomes "1991, 2001-2003,
2006-2007"
"""
generated_years_string = ""
if len(years) > 0:
y_mod_strings = ["%d" % years[0]]
last_element_was_incremental = False
for i in range(1, len(years)):
# Are we in an incremental sequence?
if years[i] == years[i - 1] + 1:
last_element_was_incremental = True
# Are we at the last element?
if i == len(years) - 1:
y_mod_strings.append("-%d" % years[i])
else:
continue
else:
# End of a sequence?
if last_element_was_incremental:
y_mod_strings.append("-%d, " % years[i - 1])
else:
y_mod_strings.append(", ")
y_mod_strings.append("%d" % years[i])
last_element_was_incremental = False
generated_years_string = "".join(y_mod_strings)
return generated_years_string
def parse_years_string(s: str) -> List[int]:
"""
Given the string "1999, 2001-2005" this function returns the list:
[1999, 2001, 2002, 2003, 2004, 2005]
"""
singles = re.findall(r"(?<![-\d])\d+(?![-\d])", s)
years = [int(x) for x in singles]
ranges = re.findall(r"\d+-\d+", s)
for r in ranges:
limits = re.findall(r"\d+", r)
years.extend(range(int(limits[0]), int(limits[1]) + 1))
years = list(set(y for y in years if 1900 < y <= CURRENT_YEAR))
years.sort()
return years
def update_header(filename: str) -> None:
"""
Updates the Copyright header in 'filename' to hold the correct years.
"""
with open(filename, "r+", encoding="utf-8") as file_handle:
file_data = file_handle.read()
copyright_match = re.search(PATTERN_COPYRIGHT, file_data)
if copyright_match:
notice_years = parse_years_string(copyright_match.group(0))
if not notice_years or notice_years[-1] != CURRENT_YEAR:
notice_years.append(CURRENT_YEAR)
years_string = generate_years_string(notice_years)
file_data = re.sub(
PATTERN_COPYRIGHT, "Copyright (c) %s" % years_string, file_data, 1
)
file_handle.seek(0)
file_handle.write(file_data)
file_handle.truncate()
bad_copyright_files = []
bad_spdx_files = []
for changed_file in sys.argv[1:]:
copyright_found = False
spdx_found = False
with open(changed_file, encoding="utf-8") as f:
for line_num, line in enumerate(f):
if line_num > MAX_SEARCH_LINES:
break
if COPYRIGHT_YEAR_REGEX.match(line):
copyright_found = True
if SPDX_REGEX.match(line):
spdx_found = True
if not copyright_found:
bad_copyright_files.append(changed_file)
update_header(changed_file)
if not spdx_found:
bad_spdx_files.append(changed_file)
if bad_copyright_files:
print(
"The following files did not have a valid copyright header: "
+ str(bad_copyright_files)
+ "\nAn attempted fix may have been made please check the files and re-commit",
file=sys.stderr,
)
if bad_spdx_files:
print(
"The following files do not have a valid SPDX licence identifier: "
+ str(bad_spdx_files)
+ "\nPlease add the identifier as follows 'SPDX-License-Identifier: MIT'",
file=sys.stderr,
)
if bad_copyright_files or bad_spdx_files:
sys.exit(1)