Skip to content

Commit a40d80f

Browse files
Updated NCBI Taxonomy to remove the scientific name from the authority
1 parent 5d87927 commit a40d80f

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

dataSources/ncbi/taxonomy/scripts/processing.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,17 @@ def loadDF(dumpFile: DumpFile) -> pd.DataFrame:
171171

172172
df["nomenclatural_code"] = df["division_cde"].apply(lambda x: divisionMap[x])
173173

174+
def cleanAuthority(authority: str, scientificName: str) -> str:
175+
if not isinstance(authority, str):
176+
return str(authority)
177+
178+
if not authority.startswith(scientificName):
179+
return authority
180+
181+
return authority[len(scientificName):].strip(" ()")
182+
183+
df["authority"] = df.apply(lambda x: cleanAuthority(x["authority"], x["scientific name"]), axis=1)
184+
174185
df["taxonomic_status"] = ""
175186
df["nomenclatural_act"] = "names usage"
176187
df["ARGA_curated"] = False

0 commit comments

Comments
 (0)