You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
selector: $infobox//div[@data-source="full_name" or @data-source="full" or @data-source="also_known_as" or @data-source="known" or @data-source="aka" or contains(@data-source,"Name") or contains(@data-source,"name") or contains(@data-source,"alias") or contains(@data-source,"Alias")]/div/text()
23
-
postProcess:
24
-
- replace:
25
-
# remove citation markers like "[3]"
26
-
- regex: "\\[[ ]*[0-9]+[ ]*\\]"
27
-
with: ""
17
+
selector: $infobox//div[@data-source="full_name" or @data-source="full" or @data-source="also_known_as" or @data-source="known" or @data-source="aka" or contains(@data-source,"Name") or contains(@data-source,"name") or contains(@data-source,"alias") or contains(@data-source,"Alias")]/div/text()[1]
28
18
Gender:
29
-
selector: $infobox//div[@data-source="gender" or @data-source="Gender" or @data-source="sex"]/div//text()
30
-
concat: ""
31
-
postProcess:
32
-
- replace:
33
-
# remove citation markers like "[3]"
34
-
- regex: "\\[[ ]*[0-9]+[ ]*\\]"
35
-
with: ""
36
-
# reduce to single word
37
-
- regex: ^([^\s]+).*
38
-
with: "$1"
19
+
selector: $infobox//div[@data-source="gender" or @data-source="Gender" or @data-source="sex"]/div//text()[1]
39
20
Birthdate:
40
-
selector: $infobox//div[contains(@data-source,"Birth") or contains(@data-source,"birth") or @data-source="born" or @data-source="dob" or @data-source="DOB"]/div//text()
41
-
concat: ""
21
+
selector: $infobox//div[contains(@data-source,"Birth") or contains(@data-source,"birth") or @data-source="born" or @data-source="dob" or @data-source="DOB"]/div//text()[1]
42
22
postProcess: &postprocessDate
43
23
# there are many formats used, many only give partial info - some examples
44
24
# September 26, 1981
@@ -48,9 +28,6 @@ xPathScrapers:
48
28
# 2 May 1998 (aged 46/47)
49
29
# 1997 [3] some more text
50
30
- replace:
51
-
# remove citation markers like "[3]"
52
-
- regex: "\\[[ ]*[0-9]+[ ]*\\]"
53
-
with: ""
54
31
# remove expressions in braces (likely only giving context)
55
32
- regex: \([^)]*\)
56
33
with: ""
@@ -74,37 +51,16 @@ xPathScrapers:
74
51
with: January 1 $1
75
52
- parseDate: January 2 2006
76
53
DeathDate:
77
-
selector: $infobox//div[contains(@data-source,"Death") or contains(@data-source,"death") or @data-source="died" or @data-source="dod" or @data-source="DOD"]/div//text()
78
-
concat: ""
54
+
selector: $infobox//div[contains(@data-source,"Death") or contains(@data-source,"death") or @data-source="died" or @data-source="dod" or @data-source="DOD"]/div//text()[1]
79
55
postProcess: *postprocessDate
80
56
Ethnicity:
81
-
selector: $infobox//div[@data-source="species" or contains(@data-source,"Race") or contains(@data-source,"race")]/div//text()
82
-
postProcess:
83
-
- replace:
84
-
# remove citation markers like "[3]"
85
-
- regex: "\\[[ ]*[0-9]+[ ]*\\]"
86
-
with: ""
57
+
selector: $infobox//div[@data-source="species" or contains(@data-source,"Race") or contains(@data-source,"race")]/div//text()[1]
0 commit comments