13
13
distributed under the License is distributed on an "AS IS" BASIS,
14
14
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
15
See the License for the specific language governing permissions and
16
- limitations
16
+ limitations
17
17
"""
18
18
19
- # Asumptions for this script:
19
+ # Asumptions for this script:
20
20
# 1. directory_name is scanned directory.
21
21
# Files are copied to this directory with full tree. As result, if we find
22
22
# license offender, we can have full path (just scrape directory_name). We do this
29
29
import os .path
30
30
import logging
31
31
import re
32
+ import ntpath
32
33
33
34
userlog = logging .getLogger ("scancode-evaluate" )
34
35
userlog .setLevel (logging .INFO )
40
41
MISSING_PERMISIVE_LICENSE_TEXT = "Non-permissive license"
41
42
MISSING_SPDX_TEXT = "Missing SPDX license identifier"
42
43
43
- def license_check (directory_name , file ):
44
- """ Check licenses in the scancode json file for specified directory
44
+ class FileDecodeError (Exception ):
45
+ """An exception for a failure to decode a file being tested."""
46
+
47
+ def path_leaf (path ):
48
+ """Return the leaf of a path."""
49
+ head , tail = ntpath .split (path )
50
+ # Ensure the correct file name is returned if the file ends with a slash
51
+ return tail or ntpath .basename (head )
52
+
53
+ def has_permissive_text_in_scancode_output (scancode_output_data_file_licenses ):
54
+ """Returns true if at list one license in the scancode output is permissive."""
55
+ return any (
56
+ scancode_output_data_file_license ['category' ] == 'Permissive'
57
+ for scancode_output_data_file_license in scancode_output_data_file_licenses
58
+ )
59
+
60
+ def has_spdx_text_in_scancode_output (scancode_output_data_file_licenses ):
61
+ """Returns true if at least one license in the scancode output has the spdx identifier."""
62
+ return any (
63
+ 'spdx' in scancode_output_data_file_license ['matched_rule' ]['identifier' ]
64
+ for scancode_output_data_file_license in scancode_output_data_file_licenses
65
+ )
66
+
67
+ def has_spdx_text_in_analysed_file (file ):
68
+ """Returns true if the file analysed by ScanCode contains SPDX identifier."""
69
+ try :
70
+ with open (file , 'r' ) as read_file :
71
+ filetext = read_file .read ()
72
+ except UnicodeDecodeError :
73
+ raise FileDecodeError (
74
+ "Unable to look for SPDX text in `{}`:" .format (file )
75
+ )
76
+
77
+ return re .findall ("SPDX-License-Identifier:?" , filetext )
78
+
79
+ def license_check (scancode_output ):
80
+ """Check licenses in the scancode json file for specified directory.
45
81
46
82
This function does not verify if file exists, should be done prior the call.
47
83
48
84
Args:
49
- directory_name - where scancode was run, used to scrape this from paths
50
85
file - scancode json output file (output from scancode --license --json-pp)
51
86
52
- Returns:
87
+ Returns:
53
88
0 if nothing found
54
89
>0 - count how many license isses found
55
90
-1 if any error in file licenses found
56
91
"""
57
92
58
93
offenders = []
59
94
try :
60
- # find all licenses in the files, must be licensed and permissive
61
- with open (file , 'r' ) as scancode_output :
62
- results = json .load (scancode_output )
63
- except ValueError :
64
- userlog .warning ("JSON could not be decoded" )
95
+ with open (scancode_output , 'r' ) as read_file :
96
+ scancode_output_data = json .load (read_file )
97
+ except json .JSONDecodeError as jex :
98
+ userlog .warning ("JSON could not be decoded, Invalid JSON in body: %s" , jex )
65
99
return - 1
66
100
67
- try :
68
- for file in results ['files' ]:
69
- license_offender = {}
70
- license_offender ['file' ] = file
71
- # ignore directory, not relevant here
72
- if license_offender ['file' ]['type' ] == 'directory' :
101
+ if 'files' not in scancode_output_data :
102
+ userlog .warning ("Missing `files` attribute in %s" % (scancode_output ))
103
+ return - 1
104
+
105
+ for scancode_output_data_file in scancode_output_data ['files' ]:
106
+ try :
107
+ if scancode_output_data_file ['type' ] != 'file' :
73
108
continue
74
- if not license_offender ['file' ]['licenses' ]:
75
- license_offender ['reason' ] = MISSING_LICENSE_TEXT
76
- offenders .append (license_offender .copy ())
109
+ except KeyError as e :
110
+ userlog .warning ("Could not find %s attribute in %s" % (str (e ), scancode_output ))
111
+ return - 1
112
+
113
+ try :
114
+ if not scancode_output_data_file ['licenses' ]:
115
+ scancode_output_data_file ['fail_reason' ] = MISSING_LICENSE_TEXT
116
+ offenders .append (scancode_output_data_file )
117
+ # check the next file in the scancode output
77
118
continue
78
-
79
- found_spdx = spdx_check (offenders , license_offender )
80
-
81
- if not found_spdx :
119
+ except KeyError as e :
120
+ userlog .warning ("Could not find %s attribute in %s" % (str (e ), scancode_output ))
121
+ return - 1
122
+
123
+ try :
124
+ if not has_permissive_text_in_scancode_output (scancode_output_data_file ['licenses' ]):
125
+ scancode_output_data_file ['fail_reason' ] = MISSING_PERMISIVE_LICENSE_TEXT
126
+ offenders .append (scancode_output_data_file )
127
+ except KeyError as e :
128
+ userlog .warning ("Could not find %s attribute in %s" % (str (e ), scancode_output ))
129
+ return - 1
130
+
131
+ try :
132
+ if not has_spdx_text_in_scancode_output (scancode_output_data_file ['licenses' ]):
133
+ # Scancode does not recognize license notice in Python file headers.
134
+ # Issue: https://github.com/nexB/scancode-toolkit/issues/1913
135
+ # Therefore check if the file tested by ScanCode actually has a licence notice.
82
136
try :
83
- # Issue reported here https://github.com/nexB/scancode-toolkit/issues/1913
84
- # We verify here if SPDX is not really there as SDPX is part of the license text
85
- # scancode has some problems detecting it properly
86
- with open (os .path .join (os .path .abspath (license_offender ['file' ]['path' ])), 'r' ) as spdx_file_check :
87
- filetext = spdx_file_check .read ()
88
- matches = re .findall ("SPDX-License-Identifier:?" , filetext )
89
- if matches :
90
- continue
91
- license_offender ['reason' ] = MISSING_SPDX_TEXT
92
- offenders .append (license_offender .copy ())
93
- except UnicodeDecodeError :
94
- # not valid file for license check
137
+ file_path = os .path .abspath (scancode_output_data_file ['path' ])
138
+ if not has_spdx_text_in_analysed_file (file_path ):
139
+ scancode_output_data_file ['fail_reason' ] = MISSING_SPDX_TEXT
140
+ offenders .append (scancode_output_data_file )
141
+ except FileDecodeError :
142
+ # Ignore files that cannot be decoded
143
+ # check the next file in the scancode output
95
144
continue
96
- except KeyError :
97
- userlog .warning ("Invalid scancode json file" )
98
- return - 1
145
+ except KeyError as e :
146
+ userlog .warning ("Could not find %s attribute in %s" % ( str ( e ), scancode_output ) )
147
+ return - 1
99
148
100
149
if offenders :
101
150
userlog .warning ("Found files with missing license details, please review and fix" )
102
151
for offender in offenders :
103
- userlog .warning ("File: " + offender ['file' ][ ' path' ][ len ( directory_name ):] + " " + "reason: " + offender ['reason' ] )
152
+ userlog .warning ("File: %s reason: %s" % ( path_leaf ( offender ['path' ]), offender ['fail_reason' ]) )
104
153
return len (offenders )
105
154
106
-
107
- def spdx_check (offenders , license_offender ):
108
- """ Parse through list of licenses to determine whether licenses are permissive
109
- @input list of offender, individual offender dict
110
- @output none
111
- """
112
- found_spdx = False
113
- # iterate through licenses, stop once permissive license has been found
114
- for i in range (len (license_offender ['file' ]['licenses' ])):
115
- # is any of the licenses permissive ?
116
- if license_offender ['file' ]['licenses' ][i ]['category' ] == 'Permissive' :
117
- # confirm that it has spdx license key
118
- if license_offender ['file' ]['licenses' ][i ]['matched_rule' ]['identifier' ].find ("spdx" ) != - 1 :
119
- found_spdx = True
120
- # if no spdx found return anyway
121
- return found_spdx
122
- # otherwise file is missing permissive license
123
- license_offender ['reason' ] = MISSING_PERMISIVE_LICENSE_TEXT
124
- offenders .append (license_offender .copy ())
125
-
126
- # missing spdx and permissive license
127
- return found_spdx
128
-
129
155
def parse_args ():
130
156
parser = argparse .ArgumentParser (
131
157
description = "License check." )
@@ -135,11 +161,11 @@ def parse_args():
135
161
help = 'Directory name where are files being checked' )
136
162
return parser .parse_args ()
137
163
138
-
139
164
if __name__ == "__main__" :
165
+
140
166
args = parse_args ()
141
167
if args .file and os .path .isfile (args .file ):
142
- count = license_check (args .directory_name , args . file )
168
+ count = license_check (args .file )
143
169
if count == 0 :
144
170
sys .exit (0 )
145
171
else :
0 commit comments