Skip to content

Commit 5a13646

Browse files
author
Cesar Smaniotto
committed
Adiciona spider que coleta dados de casos do Ceará
1 parent 455808a commit 5a13646

File tree

2 files changed

+140
-1
lines changed

2 files changed

+140
-1
lines changed

collect.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ fi
1010
source $SCRIPT_PATH/base.sh
1111

1212
mkdir -p $DOWNLOAD_PATH $OUTPUT_PATH $LOG_PATH
13-
for state in pr; do
13+
for state in ce pr; do
1414
log_filename="$LOG_PATH/caso-${state}.log"
1515
csv_filename="$OUTPUT_PATH/caso-${state}.csv"
1616
rm -rf "$log_filename" "$csv_filename"

corona_ce_spider.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
import json
2+
from collections import defaultdict
3+
from datetime import datetime, timedelta
4+
5+
import scrapy
6+
7+
8+
class Covid19CESpider(scrapy.Spider):
9+
name = "covid19ce"
10+
base_url = "https://indicadores.integrasus.saude.ce.gov.br/api/coronavirus/qtd-por-municipio?data={date}&tipo={type}"
11+
12+
def start_requests(self):
13+
yield scrapy.Request(
14+
"https://indicadores.integrasus.saude.ce.gov.br/api/coronavirus/filtro-data",
15+
self.parse_filter_date,
16+
)
17+
18+
def parse_filter_date(self, response):
19+
response_json = response.body_as_unicode()
20+
filter_date = json.loads(response_json)[0]
21+
22+
iter_date = datetime.strptime(filter_date["dataMin"], "%d/%m/%Y").date()
23+
end_date = datetime.strptime(filter_date["dataMax"], "%d/%m/%Y").date()
24+
25+
while iter_date <= end_date:
26+
yield scrapy.Request(
27+
self.base_url.format(date=iter_date.isoformat(), type="Confirmado"),
28+
self.parse_confirmed,
29+
meta={"date": iter_date.isoformat()},
30+
)
31+
32+
iter_date += timedelta(days=1)
33+
34+
def parse_confirmed(self, response):
35+
response_json = response.body_as_unicode()
36+
confirmed_cases = json.loads(response_json)
37+
38+
fixed_cases = []
39+
for case in confirmed_cases:
40+
if case["tipo"] != "Positivo":
41+
continue
42+
43+
fixed_cases.append(
44+
{
45+
**case,
46+
"date": response.meta["date"],
47+
"url": response.url,
48+
"confirmed": case["quantidade"],
49+
}
50+
)
51+
52+
yield scrapy.Request(
53+
self.base_url.format(date=response.meta["date"], type="Óbito"),
54+
self.parse_death,
55+
meta={"confirmed": fixed_cases, "date": response.meta["date"]},
56+
)
57+
58+
def parse_death(self, response):
59+
response_json = response.body_as_unicode()
60+
death_cases = json.loads(response_json)
61+
62+
fixed_cases = []
63+
for case in death_cases:
64+
if case["tipo"] != "Positivo":
65+
continue
66+
67+
fixed_cases.append(
68+
{
69+
**case,
70+
"date": response.meta["date"],
71+
"url": response.url,
72+
"deaths": case["quantidade"],
73+
}
74+
)
75+
76+
all_cases = fixed_cases + response.meta["confirmed"]
77+
parsed_cases = list(process_cities(all_cases))
78+
79+
for case in parsed_cases:
80+
yield case
81+
82+
state_case = process_state(parsed_cases)
83+
if state_case:
84+
yield state_case
85+
86+
87+
def process_cities(cases):
88+
map_city_case = defaultdict(lambda: {"deaths": 0, "confirmed": 0, "source_url": []})
89+
90+
for case in cases:
91+
municipio = case["municipio"]
92+
if "confirmed" in case:
93+
map_city_case[municipio]["confirmed"] = case["confirmed"]
94+
if "deaths" in case:
95+
map_city_case[municipio]["deaths"] = case["deaths"]
96+
97+
map_city_case[municipio]["source_url"].append(case["url"])
98+
map_city_case[municipio].update(
99+
date=case["date"], city=case["municipio"].title(),
100+
)
101+
102+
for case in map_city_case.values():
103+
yield {
104+
"date": case["date"],
105+
"state": "CE",
106+
"city": case["city"],
107+
"place_type": "city",
108+
"notified": "",
109+
"confirmed": case["confirmed"],
110+
"discarded": "",
111+
"suspect": "",
112+
"deaths": case["deaths"],
113+
"notes": "",
114+
"source_url": ",".join(case["source_url"]),
115+
}
116+
117+
118+
def process_state(cities_cases):
119+
if not cities_cases:
120+
return None
121+
122+
sum_confirmed = sum((case["confirmed"] for case in cities_cases))
123+
sum_deaths = sum((case["deaths"] for case in cities_cases))
124+
125+
city_case = cities_cases[0]
126+
127+
return {
128+
"date": city_case["date"],
129+
"state": "CE",
130+
"city": "",
131+
"place_type": "state",
132+
"notified": "",
133+
"confirmed": sum_confirmed,
134+
"discarded": "",
135+
"suspect": "",
136+
"deaths": sum_deaths,
137+
"notes": "",
138+
"source_url": city_case["source_url"],
139+
}

0 commit comments

Comments
 (0)