Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Minnesota Attorney General Opinions #172

Merged
merged 2 commits into from
Feb 1, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions juriscraper/opinions/united_states/state/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
'michctapp_p',
'michctapp_u',
'minn',
'minnag',
'minnctapp',
'miss_beginningofyear',
'miss',
Expand Down
56 changes: 56 additions & 0 deletions juriscraper/opinions/united_states/state/minnag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Scraper for Minnesota Attorney General Opinions
CourtID: minnag
Court Short Name: MN
Author: David Cook
Reviewer:
"""

import re
from lxml import html

from juriscraper.OpinionSite import OpinionSite
from juriscraper.lib.string_utils import convert_date_string


class Site(OpinionSite):
def __init__(self, *args, **kwargs):
super(Site, self).__init__(*args, **kwargs)
self.court_id = self.__module__
self.domain = 'http://www.ag.state.mn.us'
self.url = 'http://www.ag.state.mn.us/office/opinions/DATE.asp'
self.opinions = []

def _download(self, request_dict={}):
html = super(Site, self)._download(request_dict)
self._extract_case_data_from_html(html)
return html

def _extract_case_data_from_html(self, html):
for p in html.xpath("//div[@id='content']/div[@id='op']/p"):
name = p.xpath('a/strong/following-sibling::text()[1]')[0].split(u"\u2014", 1)[1]
name = re.sub("\\s+", " ", name).strip()

summary = ' '.join(p.xpath('br/following-sibling::text() | br/following-sibling::*//text()'))
summary = re.sub('\\s+', ' ', summary).strip()

self.opinions.append({
'name': name,
'url': p.xpath('a/@href')[0],
'date': convert_date_string(p.xpath('a/strong/text()')[0]),
'summary': summary
})

def _get_case_names(self):
return [opinion['name'] for opinion in self.opinions]

def _get_download_urls(self):
return [opinion['url'] for opinion in self.opinions]

def _get_case_dates(self):
return [opinion['date'] for opinion in self.opinions]

def _get_summaries(self):
return [opinion['summary'] for opinion in self.opinions]

def _get_precedential_statuses(self):
return ['Published'] * len(self.opinions)
Loading