-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathvalidate-iris.py
executable file
·72 lines (58 loc) · 2.11 KB
/
validate-iris.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python3
#
# Validate that all IRIs are in namespaces defined in the header
import re
import sys
from argparse import ArgumentParser, FileType
def main():
p = ArgumentParser()
p.add_argument('obi', type=FileType('r'))
args = p.parse_args()
obi = args.obi
namespaces = []
next(obi)
line = next(obi)
# Parse the prefixes
while "owl:Ontology" not in line:
namespaces.append(line.split('=')[1].split('"')[1])
line = next(obi)
invalid = []
for line in obi:
if 'Generated by the OWL API' in line:
# Ignore the generated line
continue
if line.strip().startswith('<!--'):
# Extract IRIs
has_iri = re.search(r'<!-- (.+) -->', line)
if has_iri:
iri = has_iri.group(1)
if iri == 'http://purl.obolibrary.org/obo/OBI_00001975':
# We know this one is invalid but it has already been released
continue
if iri.startswith('http://purl.obolibrary.org/obo/OBI_'):
# Make sure OBI IDs are exactly 7 digits
local_id = iri.split('_')[1]
if len(local_id) != 7 or not local_id.isdigit():
invalid.append(iri)
else:
# This IRI is OK
continue
if iri.startswith('http://purl.obolibrary.org/obo/') and '#' in iri:
# Do not allow pound in OBO IRIs
# Sometimes Protege default IRIs will have this
invalid.append(iri)
# Always make sure this IRI starts with a defined namespace
ok = False
for ns in namespaces:
if iri.startswith(ns):
ok = True
if not ok:
invalid.append(iri)
obi.close()
if invalid:
print('ERROR - {0} invalid IRIs found:\n{1}'.format(
len(invalid), '\n'.join(invalid)))
sys.exit(1)
sys.exit(0)
if __name__ == '__main__':
main()