diff --git a/inspacy/pipe.py b/inspacy/pipe.py index 477c69f..538b266 100644 --- a/inspacy/pipe.py +++ b/inspacy/pipe.py @@ -1,6 +1,6 @@ import spacy from spacy.tokens import Span - +import re class RuleBuilder: """Build rule for Entity Ruler based on concept's label, id and a rule level (strict, common or permis)""" @@ -18,7 +18,8 @@ def __init__(self, cfg): self.lvlfunc = { 'strict': self.build_strict_rule, 'common': self.build_common_rule, - 'permis': self.build_permis_rule + 'permis': self.build_permis_rule, + 'regexp': self.build_regexp_rule } self.sent = __class__.LANGSENT[self.lang] @@ -31,6 +32,8 @@ def __call__(self, nes): for ne in nes: func = self.lvlfunc[ne['level']] rules.append(func(ne)) + func = self.lvlfunc['regexp'] + rules.append(func(ne)) return rules def build_strict_rule(self, ne): @@ -64,11 +67,22 @@ def build_permis_rule(self, ne): wdps = [] for token in doc[1:len(doc) - self.sent['size']]: - wdps.append({"pos": token.pos_, "lemma": token.lemma_}) + if token.pos_ in {"NOUN","PROPN"}: + wdps.append({"pos": {"IN" : ["NOUN" ,"PROPN"]}, "lemma": token.lemma_}) + else: + wdps.append({"pos": token.pos_, "lemma": token.lemma_}) wdps.append(__class__.OPTADJ) pattern = wdps return {"id": id_, "label": label, "pattern": pattern} + def build_regexp_rule(self, ne): + """Build a rule matching regular expression.""" + id_ = ne['id'] + label = ne['label'] + p = re.compile('\s*\([a-zA-Z0-9_\s]*\)') + pattern=p.sub('', ne['entity']) + return {"id": id_, "label": label, "pattern": pattern} + def get_pipeline(self): """Get pipeline""" return self.nlp diff --git a/requirements.txt b/requirements.txt index b0f81e8..27657b8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ spacy flask flask-Cors -requests \ No newline at end of file +requests