Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 46 additions & 1 deletion lib/teiparse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,33 @@ def check_tei_type(type: str, debug: bool):
print("DEBUG: TEI type not supported: {}".format(type))
return False

def valid_hash(hashtype: str, teihash: str, debug: bool):
"""Check if the TEI hash URN is valid.

Sample syntax:
urn:tei:hash:teapot.example.com:sha256:00480065006C006C006F00200077006F0072006C00640021

the hash value needs to be in hex code
"""
htype = [
"sha256",
"sha384",
"sha512"
]
if hashtype not in htype:
if debug:
print("DEBUG: Invalid hash type: {}\n".format(hashtype))
return True
try:
val = int(teihash, 16)
except ValueError:
if debug:
print("DEBUG: Hash is not a hex value: {}\n".format(teihash))
return False
if debug:
print("DEBUG: Valid TEI hash {}: {}".format(hashtype, teihash))
return True


def valid_purl(purl: str, debug: bool):
"""Check if the PURL is valid.
Expand Down Expand Up @@ -79,12 +106,30 @@ def valid(
return False
if not check_tei_type(urn.specific_string.parts[0], debug):
return False
# Check the length of the domain part
domlen = len(urn.specific_string.parts[1])
if domlen == 0:
if debug:
print("ERROR: No domain part given.")
return False
if urn.specific_string.parts[0] == "purl":
if debug:
print("DEBUG: Checking PURL syntax")
# Calculate where PURL begins
start = len("urn:tei:purl:")
start += len(urn.specific_string.parts[1]) + 1
#remove the domain
start += domlen + 1
if not valid_purl(tei[start:], debug):
return False
return True
if urn.specific_string.parts[0] == "hash":
# Hash type tei:hash:<domain>:hashtype:<hash>
hashtype = urn.specific_string.parts[2]
if not valid_hash(
hashtype=hashtype,
teihash = urn.specific_string.parts[3],
debug = debug):
return False
return True

return True
62 changes: 59 additions & 3 deletions pytest/test_tei.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,7 @@ def test_bad_02(capsys, request):

def test_bad_03(capsys, request):
"""Test bad TEI with bad type"""
import os
import sys
import re
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from lib.teiparse import valid

tei = "urn:tei:gurka:prod2.example.com:" \
Expand All @@ -136,3 +133,62 @@ def test_bad_03(capsys, request):
assert re.search(
"DEBUG: TEI type not supported",
captured.out) is not None


def test_bad_03(capsys, request):
"""Test bad TEI with no domain."""
import re
from lib.teiparse import valid

tei = "urn:tei:hash::" \
"7bdb4424-612f-11ef-947e-1a52914d44b3"

tvalid = valid(tei, True)
captured = capsys.readouterr()
with capsys.disabled():
print(
"\nDEBUG {}: output: \n{}\n"
.format(request.node.name, captured.out))
assert tvalid is False
assert re.search(
"ERROR: No domain part given.",
captured.out) is not None


def test_hash_00(capsys, request):
"""Test with invalid hash."""
import re
from lib.teiparse import valid

tei = "urn:tei:hash:prod2.example.com:sha256:" \
"7bdb4424-612f-11ef-947e-1a52914d44b3"

tvalid = valid(tei, True)
captured = capsys.readouterr()
with capsys.disabled():
print(
"\nDEBUG {}: output: \n{}\n"
.format(request.node.name, captured.out))
assert tvalid is False
assert re.search(
"Hash is not a hex value",
captured.out) is not None

def test_hash_01(capsys, request):
"""Test with valid hash."""
import re
from lib.teiparse import valid

tei = "urn:tei:hash:prod2.example.com:sha256:" \
"726e11c73d62bbc3b7c3f68d25fe98dd8d8de90fb159cfa0a624efed5437435f"

tvalid = valid(tei, True)
captured = capsys.readouterr()
with capsys.disabled():
print(
"\nDEBUG {}: output: \n{}\n"
.format(request.node.name, captured.out))
assert tvalid is True
assert re.search(
"DEBUG: Valid TEI hash sha256:",
captured.out) is not None