Skip to content

Commit 48916bf

Browse files
committed
modernize
1 parent 46de586 commit 48916bf

File tree

12 files changed

+14
-24
lines changed

12 files changed

+14
-24
lines changed

csv_example/csv_evaluation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def dupePairs(filename, rowname):
2828
if "x" in dupe_d:
2929
del dupe_d["x"]
3030

31-
dupe_s = set([])
31+
dupe_s = set()
3232
for unique_id, cluster in dupe_d.items():
3333
if len(cluster) > 1:
3434
for pair in itertools.combinations(cluster, 2):

csv_example/csv_example.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/python
2-
# -*- coding: utf-8 -*-
32
"""
43
This code demonstrates how to use dedupe with a comma separated values
54
(CSV) file. All operations are performed in memory, so will run very
@@ -79,7 +78,7 @@ def readData(filename):
7978
log_level = logging.INFO
8079
elif opts.verbose >= 2:
8180
log_level = logging.DEBUG
82-
logging.getLogger().setLevel(log_level)
81+
logging.basicConfig(level=log_level)
8382

8483
# ## Setup
8584

extended-variables/officers.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/python
2-
# -*- coding: utf-8 -*-
32
"""
43
This code demonstrates how to use some extended dedupe variables
54
"""
@@ -169,7 +168,7 @@ def readData(filename):
169168
# 'Cluster ID' which indicates which records refer to each other.
170169

171170
cluster_membership = {}
172-
for (cluster_id, cluster) in enumerate(clustered_dupes):
171+
for cluster_id, cluster in enumerate(clustered_dupes):
173172
id_set, scores = cluster
174173
for record_id, score in zip(id_set, scores):
175174
cluster_membership[record_id] = {

gazetteer_example/gazetteer_example.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/python
2-
# -*- coding: utf-8 -*-
32
"""
43
This code demonstrates the Gazetteer.
54
@@ -49,7 +48,7 @@ def readData(filename):
4948
with open(filename) as f:
5049
reader = csv.DictReader(f)
5150
for i, row in enumerate(reader):
52-
clean_row = dict([(k, preProcess(v)) for (k, v) in row.items()])
51+
clean_row = {k: preProcess(v) for (k, v) in row.items()}
5352
if clean_row["price"]:
5453
clean_row["price"] = float(clean_row["price"][1:])
5554
data_d[filename + str(i)] = dict(clean_row)
@@ -92,10 +91,10 @@ def readData(filename):
9291

9392
print("importing data ...")
9493
messy = readData(messy_file)
95-
print("N data 1 records: {}".format(len(messy)))
94+
print(f"N data 1 records: {len(messy)}")
9695

9796
canonical = readData(canon_file)
98-
print("N data 2 records: {}".format(len(canonical)))
97+
print(f"N data 2 records: {len(canonical)}")
9998

10099
def descriptions():
101100
for dataset in (messy, canonical):

gazetteer_example/gazetteer_postgres_example.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/python
2-
# -*- coding: utf-8 -*-
32
"""
43
This code demonstrates Gazetteer matching backed by a Postgres database.
54
@@ -160,7 +159,7 @@ def read_data_for_postgres(filename):
160159
writer.writeheader()
161160

162161
for idx, row in enumerate(reader):
163-
clean_row = dict([(k, preProcess(v)) for k, v in row.items()])
162+
clean_row = {k: preProcess(v) for k, v in row.items()}
164163
if clean_row["price"]:
165164
clean_row["price"] = float(clean_row["price"][1:])
166165
if clean_row["unique_id"]:

mysql_example/mysql_example.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/python
2-
# -*- coding: utf-8 -*-
32

43
"""
54
This is an example of working with very large data. There are about

mysql_example/mysql_init_db.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/python
2-
# -*- coding: utf-8 -*-
32
"""
43
This is a setup script for mysql_example. It downloads a zip file of
54
Illinois campaign contributions and loads them into a MySQL database

patent_example/patent_evaluation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ def dupePairs(filename, colname):
3131
if "x" in dupe_d:
3232
del dupe_d["x"]
3333

34-
dupe_s = set([])
35-
for (unique_id, cluster) in dupe_d.items():
34+
dupe_s = set()
35+
for unique_id, cluster in dupe_d.items():
3636
if len(cluster) > 1:
3737
for pair in itertools.combinations(cluster, 2):
3838
dupe_s.add(frozenset(pair))

patent_example/patent_example.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/python
2-
# -*- coding: utf-8 -*-
32
"""
43
This code demonstrates how to use dedupe to disambiguate patent
54
authors and demonstrates the Set and LatLong data types.
@@ -26,7 +25,7 @@ def readData(filename, set_delim="**"):
2625
with open(filename) as f:
2726
reader = csv.DictReader(f)
2827
for idx, row in enumerate(reader):
29-
row = dict((k, v.lower()) for k, v in row.items())
28+
row = {k: v.lower() for k, v in row.items()}
3029
if row["Lat"] == row["Lng"] == "0.0":
3130
row["LatLong"] = None
3231
else:

pgsql_big_dedupe_example/pgsql_big_dedupe_example.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*-
32

43
"""
54
This is an example of working with very large data. There are about
@@ -38,7 +37,7 @@
3837
register_adapter(numpy.float64, AsIs)
3938

4039

41-
class Readable(object):
40+
class Readable:
4241
def __init__(self, iterator):
4342

4443
self.output = io.StringIO()

pgsql_big_dedupe_example/pgsql_big_dedupe_example_init_db.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*-
32
"""
43
This is a setup script for mysql_example. It downloads a zip file of
54
Illinois campaign contributions and loads them in t aMySQL database
@@ -51,7 +50,7 @@
5150
# Postgres COPY doesn't handle "ragged" files very well
5251
if not os.path.exists(contributions_csv_file):
5352
print("converting tab-delimited raw file to csv...")
54-
with open(contributions_txt_file, "rU") as txt_file, open(
53+
with open(contributions_txt_file) as txt_file, open(
5554
contributions_csv_file, "w"
5655
) as csv_file:
5756
csv_writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
@@ -109,7 +108,7 @@
109108

110109
conn.commit()
111110

112-
with open(contributions_csv_file, "rU") as csv_file:
111+
with open(contributions_csv_file) as csv_file:
113112
c.copy_expert(
114113
"COPY raw_table "
115114
"(reciept_id, last_name, first_name, "

record_linkage_example/record_linkage_example.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/python
2-
# -*- coding: utf-8 -*-
32
"""
43
This code demonstrates how to use RecordLink with two comma separated
54
values (CSV) files. We have listings of products from two different
@@ -49,7 +48,7 @@ def readData(filename):
4948
with open(filename) as f:
5049
reader = csv.DictReader(f)
5150
for i, row in enumerate(reader):
52-
clean_row = dict([(k, preProcess(v)) for (k, v) in row.items()])
51+
clean_row = {k: preProcess(v) for (k, v) in row.items()}
5352
if clean_row["price"]:
5453
clean_row["price"] = float(clean_row["price"][1:])
5554
data_d[filename + str(i)] = dict(clean_row)

0 commit comments

Comments
 (0)