-
-
Notifications
You must be signed in to change notification settings - Fork 125
feat(lactapp_2): new scraper for Lousiana Court of Appeals Second Circuit #1299
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
b56a955
75ce2b6
6fda2fc
57abc9b
3c1ce77
aade79e
d2989e8
1c8ba33
3d85f9c
680fb16
e07ae7b
9c38cf6
5889503
ffc687a
42ffdef
143c227
2e58de2
0111cf7
7dc0b88
76dabe2
a24315f
583e178
160b4f8
7f805f3
effeca3
a94195f
cc7e768
d26e537
8d42470
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,6 +61,7 @@ | |
"kyctapp", | ||
"la", | ||
"lactapp_1", | ||
"lactapp_2", | ||
"lactapp_5", | ||
"mass", | ||
"massappct", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
from datetime import date, datetime | ||
grossir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
from juriscraper.AbstractSite import logger | ||
from juriscraper.lib.date_utils import unique_year_month | ||
from juriscraper.lib.html_utils import ( | ||
get_row_column_links, | ||
get_row_column_text, | ||
) | ||
from juriscraper.OpinionSiteLinear import OpinionSiteLinear | ||
|
||
|
||
class Site(OpinionSiteLinear): | ||
first_opinion_date = datetime(2019, 7, 17) | ||
days_interval = 28 # Monthly interval | ||
|
||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
self.court_id = self.__module__ | ||
self.base_url = "https://www.la2nd.org/opinions/" | ||
self.year = datetime.now().year | ||
self.url = f"{self.base_url}?opinion_year={self.year}" | ||
grossir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.cases = [] | ||
grossir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.status = "Published" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Status is not always published. so you cant just assign it. There are two opinions in 2024 that do not share that distinction. Thankfully you can just use
and do something like this
to get status There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this line should be removed now that you updated it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops, my bad—I cleaned that up in my last push |
||
self.target_date = None | ||
self.make_backscrape_iterable(kwargs) | ||
|
||
def _download(self): | ||
html = super()._download() | ||
if html is not None: | ||
tables = html.cssselect("table#datatable") | ||
if not tables or not tables[0].cssselect("tbody tr"): | ||
grossir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.year -= 1 | ||
self.url = f"{self.base_url}?opinion_year={self.year}" | ||
return self._download() | ||
return html | ||
|
||
def _process_html(self): | ||
if self.html is None: | ||
return | ||
|
||
grossir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
tables = self.html.cssselect("table#datatable") | ||
if tables and tables[0].cssselect("tbody tr"): | ||
grossir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
logger.info(f"Processing cases for year: {self.year}") | ||
for row in tables[0].cssselect("tbody tr"): | ||
case_date = datetime.strptime( | ||
get_row_column_text(row, 1), "%m/%d/%Y" | ||
).date() | ||
|
||
# Skip if before first opinion date | ||
if case_date < self.first_opinion_date.date(): | ||
continue | ||
|
||
# Only apply date filtering during backscrape | ||
if ( | ||
hasattr(self, "back_scrape_iterable") | ||
and self.back_scrape_iterable | ||
): | ||
if self.target_date: | ||
target_month = self.target_date.month | ||
target_year = self.target_date.year | ||
if ( | ||
case_date.year != target_year | ||
or case_date.month != target_month | ||
): | ||
continue | ||
grossir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
self.cases.append( | ||
{ | ||
"date": get_row_column_text(row, 1), | ||
grossir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"docket": get_row_column_text(row, 2), | ||
"name": get_row_column_text(row, 3), | ||
"author": get_row_column_text(row, 4), | ||
"disposition": get_row_column_text(row, 5), | ||
"lower_court": get_row_column_text(row, 6), | ||
"summary": get_row_column_text(row, 7), | ||
flooie marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"url": get_row_column_links(row, 8), | ||
} | ||
) | ||
|
||
def _download_backwards(self, target_date: date) -> None: | ||
logger.info(f"Backscraping for date: {target_date}") | ||
self.target_date = target_date | ||
self.year = target_date.year | ||
self.url = f"{self.base_url}?opinion_year={self.year}" | ||
self.html = self._download() | ||
grossir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self._process_html() | ||
|
||
def make_backscrape_iterable(self, kwargs): | ||
grossir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
super().make_backscrape_iterable(kwargs) | ||
self.back_scrape_iterable = unique_year_month( | ||
grossir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.back_scrape_iterable | ||
) |
Uh oh!
There was an error while loading. Please reload this page.