turicas · berinhard · May 8, 2020 · May 8, 2020 · May 18, 2020 · May 18, 2020
diff --git a/web/spiders/__init__.py b/web/spiders/__init__.py
@@ -8,6 +8,7 @@
 from .spider_pr import Covid19PRSpider
 from .spider_rn import Covid19RNSpider
 from .spider_rr import Covid19RRSpider
+from .spider_ro import Covid19ROSpider
 
 
 SPIDERS = [
@@ -16,6 +17,7 @@
     Covid19PRSpider,
     Covid19RRSpider,
     Covid19RNSpider,
+    Covid19ROSpider,
 ]
 STATE_SPIDERS = {SpiderClass.name: SpiderClass for SpiderClass in SPIDERS}
 # TODO: do autodiscovery from base class' subclasses
@@ -29,6 +31,7 @@ def execute_spider_worker(SpiderClass):
         process.start()
     except Exception as exp:
         import traceback
+
         return "error", traceback.format_exc()
     else:
         report_fobj.seek(0)

diff --git a/web/spiders/spider_pa.py b/web/spiders/spider_pa.py
@@ -1,13 +1,13 @@
-import io
-import json
+import os
+import scrapy
 from urllib.parse import urlencode
 
 from .base import BaseCovid19Spider
 
 
 class Covid19PASpider(BaseCovid19Spider):
     name = "PA"
-    base_url = "https://www.covid-19.pa.gov.br/monitoramento-corona-service/statuscorona/casos-confirmados-obitos-por-municipio"]
+    base_url = "https://www.covid-19.pa.gov.br/monitoramento-corona-service/statuscorona/casos-confirmados-obitos-por-municipio"
     splash_url = os.environ.get("SPLASH_URL", None)
 
     def start_requests(self):

diff --git a/web/spiders/spider_pe.py b/web/spiders/spider_pe.py
@@ -26,11 +26,13 @@ def city_id_from_name(self):
         return data
 
     def parse(self, response):
-        page_jsons = response.xpath("//script[@type='application/json' and @data-for]/text()")
+        page_jsons = response.xpath(
+            "//script[@type='application/json' and @data-for]/text()"
+        )
         case_data = None
         for json_data in page_jsons.extract():
             data = json.loads(json_data)["x"]
-            if data['options'].get('buttons'):
+            if data["options"].get("buttons"):
                 continue
             case_data = data["data"]
             break
@@ -71,7 +73,7 @@ def parse(self, response):
     def fix_row(self, row):
         new = row.copy()
         cd_municipio = new["cd_municipio"]
-        if cd_municipio == '-':
+        if cd_municipio == "-":
             cd_municipio = 0
 
         if int(cd_municipio) == 0 or not new["cd_municipio"]:

diff --git a/web/spiders/spider_ro.py b/web/spiders/spider_ro.py
@@ -0,0 +1,52 @@
+import json
+import scrapy
+
+from datetime import date
+from .base import BaseCovid19Spider
+
+
+class Covid19ROSpider(BaseCovid19Spider):
+    name = "RO"
+    start_urls = ["http://covid19.sesau.ro.gov.br"]
+
+    def parse(self, response):
+        # extract the displayed date (there's no option to change the date to a past one)
+        date_container = response.xpath(
+            "//*[text()='calendar_today']/../text()"
+        ).extract()
+        report_date = [t.strip() for t in date_container if t.strip()][0]
+
+        # get the URL for the JS file with the data
+        js_script = response.xpath(
+            "//attribute::*[contains(., 'estadoRO')]/../@src"
+        ).extract()[0]
+        full_url = response.url + js_script
+
+        year, month, day = [int(v) for v in report_date.split("/")[::-1]]
+        self.add_report(date=date(year, month, day), url=full_url)
+
+        yield scrapy.Request(
+            url=full_url,
+            meta={"row": {"date": date}},
+            callback=self.parse_js_data_script,
+        )
+
+    def parse_js_data_script(self, response):
+        """
+        The JS code only defines a variable called 'cidades' with the required JSON data to the other
+        JS codes work with. This parsing function cleans up the JS file to get only the JSON content.
+        """
+        json_data = response.body_as_unicode().replace("var cidades = ", "").strip()
+        content = json.loads(json_data)
+
+        total_confirmed, total_deaths = 0, 0
+        for data in [d["properties"] for d in content["features"]]:
+            city, confirmed, deaths = data["NOME"], data["confirmados"], data["obitos"]
+            total_confirmed += confirmed
+            total_deaths += deaths
+
+            self.add_city_case(city=city, confirmed=confirmed, deaths=deaths)
+
+        #  TODO: in the future we'll might have to change this once this data is available
+        self.add_city_case(city="Importados/Indefinidos", confirmed=None, deaths=None)
+        self.add_state_case(confirmed=total_confirmed, deaths=total_deaths)