-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathdemo.py
46 lines (31 loc) · 1.22 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from deidentify.base import Document
from deidentify.taggers import FlairTagger
from deidentify.tokenizer import TokenizerFactory
# Create some text
text = (
"Dit is stukje tekst met daarin de naam Jan Jansen. De patient J. Jansen (e: "
"[email protected], t: 06-12345678) is 64 jaar oud en woonachtig in Utrecht. Hij werd op 10 "
"oktober door arts Peter de Visser ontslagen van de kliniek van het UMCU."
)
# Wrap text in document
documents = [
Document(name='doc_01', text=text)
]
# Select downloaded model
model = 'model_bilstmcrf_ons_fast-v0.2.0'
# Instantiate tokenizer
tokenizer = TokenizerFactory().tokenizer(corpus='ons', disable=("tagger", "ner"))
# Load tagger with a downloaded model file and tokenizer
tagger = FlairTagger(model=model, tokenizer=tokenizer, verbose=False)
# Annotate your documents
annotated_docs = tagger.annotate(documents)
from pprint import pprint
first_doc = annotated_docs[0]
pprint(first_doc.annotations)
from deidentify.util import mask_annotations
masked_doc = mask_annotations(first_doc)
print(masked_doc.text)
from deidentify.util import surrogate_annotations
iter_docs = surrogate_annotations(docs=[first_doc], seed=1)
surrogate_doc = list(iter_docs)[0]
print(surrogate_doc.text)