Skip to content

Commit 9fe1137

Browse files
authored
Merge pull request #1 from danizen/develop
Add a MarcSearch class to memoize patterns.
2 parents 024a4de + 6da9681 commit 9fe1137

File tree

4 files changed

+53
-7
lines changed

4 files changed

+53
-7
lines changed

README.md

+22
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,28 @@ with open(sys.argv[1], 'rb') as f:
2525
print(subjects)
2626
```
2727

28+
There is also a `MarcSearch` object that memoizes each search expression, so that
29+
you can conveniently run a number of different searches without creating several
30+
parsed specs. For example:
31+
32+
```python
33+
import csv
34+
import sys
35+
from pymarcspec import MarcSearch
36+
from pymarc import MARCReader
37+
38+
writer = csv.writer(sys.stdout, dialect='unix', quoting=csv.QUOTE_MINIMAL)
39+
writer.writerow(['id', 'title', 'subjects'])
40+
41+
marcsearch = MarcSearch()
42+
with open(sys.argv[1], 'rb') as f:
43+
for record in MARCReader(f):
44+
control_id = marcsearch.search('100', record)
45+
title = marcsearch.search('245[0]$a-c', record)
46+
subjects = marcsearch.search('650$a', record, field_delimiter=', ')
47+
writer.writerow([control_id, title, subjects])
48+
```
49+
2850
## Development
2951

3052
### Building the Parser

VERSION.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.0.1
1+
0.0.2

pymarcspec/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .parser import MarcSpecParser # noqa:
22
from .semantics import MarcSearchSemantics # noqa:
3-
from .search import MarcSearchParser # noqa:
3+
from .search import MarcSearchParser, MarcSearch # noqa:
44
from .model import MarcSpec # noqa:

pymarcspec/search.py

+29-5
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,51 @@
99
from .semantics import MarcSearchSemantics
1010

1111

12-
# memoize compiling of strings into AST using some searcher
1312
class MarcSearchParser(MarcSpecParser):
1413
def __init__(self, *args, **kwargs):
1514
kwargs.update({
1615
'whitespace': '',
1716
'semantics': MarcSearchSemantics()
1817
})
1918
super().__init__(*args, **kwargs)
19+
self.memoized = dict()
20+
21+
22+
# memoize compiling of strings into specs
23+
class MarcSearch:
24+
"""
25+
Memoizes compiled specifications to offset
26+
cost of compiling each again and again.
27+
28+
Can be used over multiple records and
29+
multiple specs.
30+
"""
31+
def __init__(self):
32+
self.parser = MarcSearchParser()
33+
self.specs = dict()
34+
35+
def parse(self, spec):
36+
compiled_spec = self.specs.get(spec)
37+
if compiled_spec is None:
38+
self.specs[spec] = compiled_spec = self.parser.parse(spec)
39+
return compiled_spec
40+
41+
def search(self, spec, record, **kwargs):
42+
compiled_spec = self.parse(spec)
43+
return compiled_spec.search(record, **kwargs)
2044

2145

2246
def marc_search(marcspec, stream, field_delimiter=':', subfield_delimiter=''):
23-
parser = MarcSearchParser()
24-
spec = parser.parse(marcspec)
47+
searcher = MarcSearch()
48+
searcher.parse(marcspec)
2549

2650
if stream.name.endswith('.xml'):
2751
generator = parse_xml_to_array(stream)
2852
else:
2953
generator = MARCReader(stream)
3054
for record in generator:
31-
result = spec.search(
32-
record,
55+
result = searcher.search(
56+
marcspec, record,
3357
field_delimiter=field_delimiter,
3458
subfield_delimiter=subfield_delimiter
3559
)

0 commit comments

Comments
 (0)