freelawproject
diff --git a/‎CHANGES.md
+1 b/‎CHANGES.md
+1
diff --git a/‎juriscraper/opinions/united_states/state/__init__.py
+1-1 b/‎juriscraper/opinions/united_states/state/__init__.py
+1-1
diff --git a/‎juriscraper/opinions/united_states/state/me.py
+113-36 b/‎juriscraper/opinions/united_states/state/me.py
+113-36
diff --git a/‎tests/examples/opinions/united_states/me_example.compare.json
+16-1 b/‎tests/examples/opinions/united_states/me_example.compare.json
+16-1
@@ -12,6 +12,7 @@ Releases are also tagged in git, if that's helpful.
 
 ## Coming up
 
+- Fix `me` Update maine scraper and add backscraper
 
 ## Current
 
 
@@ -76,7 +76,7 @@
     # that. It will also be worth considering whether the scraper itself
     # should be re-written to scrape CourtListener's API instead of the
     # court's website.
-    # "me",
+    "me",
     "mesuperct",
     "mich",
     "michctapp",
 
@@ -13,48 +13,125 @@
   2022-01-06: This scraper is not maintained. Future work to gather this
               data should be done by scraping the CourtListener API
               https://www.courtlistener.com/api/rest/v3/clusters/?docket__court__id=me
+
+  2025-03-31: This scraper has been updated with a backscraper (flooie)
 """
 
-from lxml import html
+import re
+from datetime import date
+
+from juriscraper.AbstractSite import logger
+from juriscraper.lib.judge_parsers import normalize_judge_names
+from juriscraper.lib.string_utils import convert_date_string, titlecase
+from juriscraper.OpinionSiteLinear import OpinionSiteLinear
 
-from juriscraper.lib.string_utils import convert_date_string
-from juriscraper.OpinionSite import OpinionSite
 
+class Site(OpinionSiteLinear):
+    url_template = (
+        "https://www.courts.maine.gov/courts/sjc/lawcourt/{}/index.html"
+    )
+    first_opinion_year = 2017
 
-class Site(OpinionSite):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.court_id = self.__module__
         self.url = "https://www.courts.maine.gov/courts/sjc/opinions.html"
-        self.path_root = '//table[contains(.//th[1], "Opinion")]'
-
-    def _get_cell_path(self, cell_number: int, subpath: str = "") -> str:
-        path = '//table[contains(.//th[1], "Opinion")]//td[%d]'
-        return f"{path}/{subpath}" if subpath else path
-
-    def _get_download_urls(self):
-        path = f"{self.path_root}//td[2]/a[1]/@href"
-        return list(self.html.xpath(path))
-
-    def _get_case_names(self):
-        case_names = []
-        path = f"{self.path_root}//td[2]/a[1]"
-        for e in self.html.xpath(path):
-            s = html.tostring(e, method="text", encoding="unicode")
-            case_names.append(s)
-        return case_names
-
-    def _get_case_dates(self):
-        dates = []
-        path = f"{self.path_root}//td[3]"
-        for cell in self.html.xpath(path):
-            date_string = cell.text_content().replace("Aguust", "August")
-            dates.append(convert_date_string(date_string))
-        return dates
-
-    def _get_precedential_statuses(self):
-        return ["Published"] * len(self.case_names)
-
-    def _get_citations(self):
-        path = f"{self.path_root}//td[1]//text()"
-        return list(self.html.xpath(path))
+        self.path_root = '//table[contains(.//th[1], "Opinion")]//tr[td]'
+        self.status = "Published"
+        self.make_backscrape_iterable(kwargs)
+
+    def _process_html(self):
+        for row in self.html.xpath(self.path_root):
+            cite, name, date = row.xpath("./td")
+
+            # handle the one typo
+            date_str = date.text_content().replace("Aguust", "August")
+
+            case_name = titlecase(name.text_content())
+            if "Revised" in case_name:
+                # handle revised opinions case name
+                case_name = case_name.split("Revised")[0].strip()
+            self.cases.append(
+                {
+                    "citation": cite.text_content(),
+                    "date": date_str,
+                    "name": case_name,
+                    "url": name.xpath(".//a")[0].attrib["href"],
+                    "docket": "",
+                }
+            )
+
+    def extract_from_text(self, scraped_text: str) -> dict:
+        """Extract out lots of data from Maine
+
+        :param scraped_text: The first page of content
+        :return: The dictionary of extracted data
+        """
+        pattern = re.compile(
+            r"(?P<label>Docket|On Briefs|Decided|Argued|Panel|Reporter of Decisions):\s*(?P<value>[^\n]+)"
+        )
+        extracted = {}
+        for match in pattern.finditer(scraped_text[:500]):
+            label = match.group("label")
+            value = match.group("value").strip()
+            extracted[label] = value
+
+        author = r"(?P<author_str>.*)\n+(\s+)?\[¶1\]"
+        m = re.search(author, scraped_text, re.MULTILINE)
+        if m:
+            if m.group("author_str") == "PER CURIAM":
+                per_curiam = True
+                author_str = ""
+            else:
+                per_curiam = False
+                author_str = m.group("author_str")
+        else:
+            per_curiam = False
+            author_str = ""
+
+        date_argued = extracted.get("On Briefs", "") or extracted.get(
+            "Argued", ""
+        )
+        date_argued_str = ""
+        if date_argued:
+            # Format date
+            date_argued = convert_date_string(date_argued)
+            date_argued_str = date_argued.strftime("%Y-%m-%d")
+
+        metadata = {
+            "Opinion": {
+                "author_str": normalize_judge_names(author_str),
+                "per_curiam": per_curiam,
+            },
+            "OpinionCluster": {
+                "judges": extracted.get("Panel", ""),
+            },
+            "Docket": {
+                "date_argued": date_argued_str,
+                "docket_number": extracted.get("Docket", ""),
+            },
+        }
+
+        return metadata
+
+    def _download_backwards(self, year: int) -> None:
+        self.url = self.url_template.format(year)
+        logger.info("Backscraping for year %s %s", year, self.url)
+        self.html = self._download()
+        self._process_html()
+
+    def make_backscrape_iterable(self, kwargs: dict):
+        if kwargs.get("backscrape_start"):
+            start = int(kwargs["backscrape_start"])
+        else:
+            start = self.first_opinion_year
+
+        if kwargs.get("backscrape_end"):
+            end = int(kwargs["backscrape_end"])
+        else:
+            end = date.today().year - 1
+
+        if start == end:
+            end = start + 1
+
+        self.back_scrape_iterable = range(start, end)
@@ -6,6 +6,7 @@
     "precedential_statuses": "Published",
     "blocked_statuses": false,
     "date_filed_is_approximate": false,
+    "docket_numbers": "",
     "citations": "2014 ME 74",
     "case_name_shorts": ""
   },
@@ -16,6 +17,7 @@
     "precedential_statuses": "Published",
     "blocked_statuses": false,
     "date_filed_is_approximate": false,
+    "docket_numbers": "",
     "citations": "2014 ME 75",
     "case_name_shorts": ""
   },
@@ -26,6 +28,7 @@
     "precedential_statuses": "Published",
     "blocked_statuses": false,
     "date_filed_is_approximate": false,
+    "docket_numbers": "",
     "citations": "2014 ME 64",
     "case_name_shorts": ""
   },
@@ -36,6 +39,7 @@
     "precedential_statuses": "Published",
     "blocked_statuses": false,
     "date_filed_is_approximate": false,
+    "docket_numbers": "",
     "citations": "2014 ME 63",
     "case_name_shorts": ""
   },
@@ -46,6 +50,7 @@
     "precedential_statuses": "Published",
     "blocked_statuses": false,
     "date_filed_is_approximate": false,
+    "docket_numbers": "",
     "citations": "2014 ME 62",
     "case_name_shorts": ""
   },
@@ -56,6 +61,7 @@
     "precedential_statuses": "Published",
     "blocked_statuses": false,
     "date_filed_is_approximate": false,
+    "docket_numbers": "",
     "citations": "2014 ME 52",
     "case_name_shorts": ""
   },
@@ -66,6 +72,7 @@
     "precedential_statuses": "Published",
     "blocked_statuses": false,
     "date_filed_is_approximate": false,
+    "docket_numbers": "",
     "citations": "2014 ME 51",
     "case_name_shorts": ""
   },
@@ -76,6 +83,7 @@
     "precedential_statuses": "Published",
     "blocked_statuses": false,
     "date_filed_is_approximate": false,
+    "docket_numbers": "",
     "citations": "2014 ME 35",
     "case_name_shorts": ""
   },
@@ -86,6 +94,7 @@
     "precedential_statuses": "Published",
     "blocked_statuses": false,
     "date_filed_is_approximate": false,
+    "docket_numbers": "",
     "citations": "2014 ME 34",
     "case_name_shorts": ""
   },
@@ -96,6 +105,7 @@
     "precedential_statuses": "Published",
     "blocked_statuses": false,
     "date_filed_is_approximate": false,
+    "docket_numbers": "",
     "citations": "2014 ME 36",
     "case_name_shorts": "Adoption of T.D."
   },
@@ -106,6 +116,7 @@
     "precedential_statuses": "Published",
     "blocked_statuses": false,
     "date_filed_is_approximate": false,
+    "docket_numbers": "",
     "citations": "2014 ME 12",
     "case_name_shorts": ""
   },
@@ -116,6 +127,7 @@
     "precedential_statuses": "Published",
     "blocked_statuses": false,
     "date_filed_is_approximate": false,
+    "docket_numbers": "",
     "citations": "2014 ME 13",
     "case_name_shorts": ""
   },
@@ -126,16 +138,18 @@
     "precedential_statuses": "Published",
     "blocked_statuses": false,
     "date_filed_is_approximate": false,
+    "docket_numbers": "",
     "citations": "2014 ME 3",
     "case_name_shorts": ""
   },
   {
     "case_dates": "2014-01-09",
-    "case_names": "In re Steven L.",
+    "case_names": "In Re Steven L.",
     "download_urls": "tests/examples/opinions/united_states/lawcourt/2014/14me1sl.pdf",
     "precedential_statuses": "Published",
     "blocked_statuses": false,
     "date_filed_is_approximate": false,
+    "docket_numbers": "",
     "citations": "2014 ME 1",
     "case_name_shorts": ""
   },
@@ -146,6 +160,7 @@
     "precedential_statuses": "Published",
     "blocked_statuses": false,
     "date_filed_is_approximate": false,
+    "docket_numbers": "",
     "citations": "2014 ME 2",
     "case_name_shorts": ""
   }
Original file line number	Diff line number	Diff line change
`@@ -12,6 +12,7 @@ Releases are also tagged in git, if that's helpful.`
`12`	`12`
`13`	`13`	`## Coming up`
`14`	`14`
	`15`	+- Fix `me` Update maine scraper and add backscraper
`15`	`16`
`16`	`17`	`## Current`
`17`	`18`