Skip to content

Commit ba49e96

Browse files
Merge pull request #251 from ARGA-Genomes/moreNCBITaxonomyFixes
More NCBI Taxonomy Fixes
2 parents 01596e4 + 2c9cdc3 commit ba49e96

File tree

1 file changed

+15
-7
lines changed

1 file changed

+15
-7
lines changed

dataSources/ncbi/taxonomy/scripts/processing.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -171,16 +171,24 @@ def loadDF(dumpFile: DumpFile) -> pd.DataFrame:
171171

172172
df["nomenclatural_code"] = df["division_cde"].apply(lambda x: divisionMap[x])
173173

174-
def cleanAuthority(authority: str, scientificName: str) -> str:
175-
if not isinstance(authority, str):
176-
return str(authority)
177-
178-
if not authority.startswith(scientificName):
174+
def cleanAuthority(authority: any, scientificName: any, synonym: any) -> str:
175+
authority: str = str(authority).strip()
176+
177+
for item in (scientificName, synonym):
178+
itemStr = str(item)
179+
180+
if authority.startswith(itemStr):
181+
authority = authority[len(itemStr):].strip()
182+
183+
if not authority:
179184
return authority
180185

181-
return authority[len(scientificName):].strip(" ()")
186+
if authority[0] == "(" and authority[-1] == ")":
187+
authority = authority[1:-1]
188+
189+
return authority
182190

183-
df["authority"] = df.apply(lambda x: cleanAuthority(x["authority"], x["scientific name"]), axis=1)
191+
df["authority"] = df.apply(lambda x: cleanAuthority(x["authority"], x["scientific name"], x["synonym"]), axis=1)
184192

185193
df["taxonomic_status"] = ""
186194
df["nomenclatural_act"] = "names usage"

0 commit comments

Comments
 (0)