Skip to content

Commit 558c331

Browse files
committed
adiciona spider para GO
1 parent c8e3388 commit 558c331

File tree

2 files changed

+80
-0
lines changed

2 files changed

+80
-0
lines changed

web/spiders/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from .spider_ce import Covid19CESpider
77
from .spider_es import Covid19ESSpider
8+
from .spider_go import Covid19GOSpider
89
from .spider_pe import Covid19PESpider
910
from .spider_pr import Covid19PRSpider
1011
from .spider_rn import Covid19RNSpider
@@ -14,6 +15,7 @@
1415
SPIDERS = [
1516
Covid19CESpider,
1617
Covid19ESSpider,
18+
Covid19GOSpider,
1719
Covid19PESpider,
1820
Covid19PRSpider,
1921
Covid19RNSpider,

web/spiders/spider_go.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import io
2+
from itertools import groupby
3+
from collections import defaultdict
4+
from datetime import datetime
5+
6+
import rows
7+
8+
from .base import BaseCovid19Spider
9+
10+
class YMDDateField(rows.fields.DateField):
11+
INPUT_FORMAT = "%Y%m%d"
12+
13+
14+
class Covid19GOSpider(BaseCovid19Spider):
15+
name = "GO"
16+
start_urls = [
17+
"http://datasets.saude.go.gov.br/coronavirus/obitos_confirmados.csv",
18+
"http://datasets.saude.go.gov.br/coronavirus/casos_confirmados.csv"
19+
]
20+
21+
def __init__(self, *args, **kwargs):
22+
super().__init__(*args, **kwargs)
23+
self.cases = defaultdict(dict)
24+
25+
def parse(self, response):
26+
table = rows.import_from_csv(
27+
io.BytesIO(response.body),
28+
encoding=response.encoding,
29+
force_types={"data_notificacao": YMDDateField},
30+
)
31+
32+
table = [row for row in table]
33+
34+
# FIXME : make sure the REAL last date is used, since it will differ from obitos_confirmados.csv and casos_confirmados.csv
35+
last_date = max(row.data_notificacao for row in table)
36+
self.add_report(date=last_date, url=response.url)
37+
38+
row_key = lambda row: row.municipio
39+
table.sort(key=row_key)
40+
41+
for city, city_data in groupby(table, key=row_key):
42+
if "casos_confirmados.csv" in response.url:
43+
self.cases[city]["confirmed"] = len(list(city_data))
44+
elif "obitos_confirmados.csv" in response.url:
45+
self.cases[city]["deaths"] = len(list(city_data))
46+
47+
def spider_closed(self):
48+
49+
total_confirmed = total_deaths = 0
50+
imported_confirmed = imported_deaths = 0
51+
52+
for city, city_data in self.cases.items():
53+
confirmed = city_data["confirmed"]
54+
deaths = city_data.get("deaths", 0)
55+
56+
try:
57+
self.get_city_id_from_name(city)
58+
except KeyError:
59+
imported_confirmed += confirmed
60+
imported_deaths += deaths
61+
else:
62+
self.add_city_case(city=city, confirmed=confirmed, deaths=deaths)
63+
64+
total_confirmed += confirmed
65+
total_deaths += deaths
66+
67+
if imported_confirmed == imported_deaths == 0:
68+
imported_confirmed = imported_deaths = None
69+
70+
self.add_city_case(
71+
city="Importados/Indefinidos",
72+
confirmed=imported_confirmed,
73+
deaths=imported_deaths,
74+
)
75+
76+
self.add_state_case(confirmed=total_confirmed, deaths=total_deaths)
77+
78+
super().spider_closed(self)

0 commit comments

Comments
 (0)