Skip to content

Commit 87d1004

Browse files
committed
Merge branch 'development'
2 parents 661435a + d6eab21 commit 87d1004

11 files changed

+211
-129
lines changed

.editorconfig

+11-9
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,21 @@
33
root = true
44

55
[*]
6-
indent_style = space
7-
indent_size = 4
8-
trim_trailing_whitespace = true
9-
insert_final_newline = true
106
charset = utf-8
117
end_of_line = lf
8+
insert_final_newline = true
9+
trim_trailing_whitespace = true
1210

13-
[*.bat]
14-
indent_style = tab
15-
end_of_line = crlf
11+
[*.{py,rst,ini}]
12+
indent_style = space
13+
indent_size = 4
14+
15+
[*.{html,css,scss,json,yml}]
16+
indent_style = space
17+
indent_size = 2
1618

17-
[LICENSE]
18-
insert_final_newline = false
19+
[*.md]
20+
trim_trailing_whitespace = false
1921

2022
[Makefile]
2123
indent_style = tab

CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22

33
## Development
44

5+
## 0.5.0 (2021-06-30)
6+
7+
* [#22](https://github.com/rheinwerk-verlag/postgresql-anonymizer/pull/22): Fix table and column name quotes in cursor.copy_from call ([nurikk](https://github.com/nurikk))
8+
* [#23](https://github.com/rheinwerk-verlag/postgresql-anonymizer/pull/23): Allow uniq faker ([nurikk](https://github.com/nurikk))
9+
510
## 0.4.1 (2021-05-27)
611

712
* [#19](https://github.com/rheinwerk-verlag/postgresql-anonymizer/pull/19): Make chunk size in the table definition dynamic ([halilkaya](https://github.com/halilkaya))

README.rst

+12-3
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
PostgreSQL Anonymizer
22
=====================
33

4-
A commandline tool to anonymize PostgreSQL databases for GDPR purposes.
4+
A commandline tool to anonymize PostgreSQL databases for DSGVO/GDPR purposes.
55

6-
It uses a YAML definition file to define which tables and fields should be anonymized and provides various methods of anonymization.
6+
It uses a YAML file to define which tables and fields should be anonymized and provides various methods of anonymization. The tool requires a direct PostgreSQL connection to perform the anonymization.
77

88
.. class:: no-web no-pdf
99

10-
|python| |license| |pypi| |downloads| |build|
10+
|python| |license| |pypi| |downloads| |build| |health|
1111

1212
.. contents::
1313

@@ -34,9 +34,13 @@ Features
3434
+----------------+----------------------+-----------------------+----------------------------------+
3535
| ``email`` | [email protected] | ``md5`` | 0cba00ca3da1b283a57287bcceb17e35 |
3636
+----------------+----------------------+-----------------------+----------------------------------+
37+
| ``email`` | [email protected] | ``faker.unique.email``| [email protected] |
38+
+----------------+----------------------+-----------------------+----------------------------------+
3739
| ``ip`` | 157.50.1.20 | ``set`` | 127.0.0.1 |
3840
+----------------+----------------------+-----------------------+----------------------------------+
3941

42+
Note: `faker.unique.[provider]` only supported on python3.5+ (Faker library min supported python version)
43+
4044
See the `documentation`_ for a more detailed description of the provided anonymization methods.
4145

4246
Installation
@@ -161,3 +165,8 @@ After that you can pass a schema file to the container, using Docker volumes, an
161165

162166
.. |build| image:: https://github.com/rheinwerk-verlag/postgresql-anonymizer/workflows/Test/badge.svg
163167
:target: https://github.com/rheinwerk-verlag/postgresql-anonymizer/actions
168+
169+
.. |health| image:: https://snyk.io/advisor/python/pganonymize/badge.svg
170+
:target: https://snyk.io/advisor/python/pganonymize
171+
:alt: pganonymize
172+

docs/schema.rst

+7-2
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,8 @@ The ``clear`` provider will set a database field to ``null``.
216216

217217
**Arguments:** none
218218

219-
``pganonymize`` supports all providers from the Python library Faker_. All you have to do is prefix
220-
the provider with ``fake`` and then use the function name from the Faker library, e.g:
219+
``pganonymize`` supports all providers from the Python library `Faker`_. All you have to do is prefix the provider with
220+
``fake`` and then use the function name from the Faker library, e.g:
221221

222222
* ``fake.first_name``
223223
* ``fake.street_name``
@@ -235,6 +235,8 @@ the provider with ``fake`` and then use the function name from the Faker library
235235
provider:
236236
name: fake.email
237237

238+
See the `Faker documentation`_ for a full set of providers.
239+
238240
``mask``
239241
~~~~~~~~
240242

@@ -298,3 +300,6 @@ The value can also be a dictionary for JSONB columns::
298300
provider:
299301
name: set
300302
value: '{"foo": "bar", "baz": 1}'
303+
304+
.. _Faker: https://github.com/joke2k/faker
305+
.. _Faker documentation: http://faker.rtfd.org/

pganonymizer/providers.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import operator
12
import random
23
from hashlib import md5
34

@@ -87,9 +88,9 @@ def matches(cls, name):
8788
return cls.id.lower() == name.split('.')[0].lower()
8889

8990
def alter_value(self, value):
90-
func_name = self.kwargs['name'].split('.')[1]
91+
func_name = self.kwargs['name'].split('.', 1)[1]
9192
try:
92-
func = getattr(fake_data, func_name)
93+
func = operator.attrgetter(func_name)(fake_data)
9394
except AttributeError as exc:
9495
raise InvalidProviderArgument(exc)
9596
return func()

pganonymizer/utils.py

+21-14
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def build_data(connection, table, columns, excludes, search, total_count, chunk_
8181
row[key] = value
8282
if verbose:
8383
progress_bar.next()
84-
table_columns = ['"{}"'.format(column) for column in row.keys()]
84+
table_columns = [column for column in row.keys()]
8585
if not row_column_dict:
8686
continue
8787
data.append(row.values())
@@ -97,9 +97,11 @@ def row_matches_excludes(row, excludes=None):
9797
9898
:param list row: The data row
9999
:param list excludes: A list of field exclusion roles, e.g.:
100-
[
101-
{'email': ['\\S.*@example.com', '\\S.*@foobar.com', ]}
102-
]
100+
101+
>>> [
102+
>>> {'email': ['\\S.*@example.com', '\\S.*@foobar.com', ]}
103+
>>> ]
104+
103105
:return: True or False
104106
:rtype: bool
105107
"""
@@ -126,7 +128,8 @@ def copy_from(connection, data, table, columns):
126128
new_data = data2csv(data)
127129
cursor = connection.cursor()
128130
try:
129-
cursor.copy_from(new_data, table, sep=COPY_DB_DELIMITER, null='\\N', columns=columns)
131+
quoted_cols = ['"{}"'.format(column) for column in columns]
132+
cursor.copy_from(new_data, table, sep=COPY_DB_DELIMITER, null='\\N', columns=quoted_cols)
130133
except (BadCopyFileFormat, InvalidTextRepresentation) as exc:
131134
raise BadDataFormat(exc)
132135
cursor.close()
@@ -145,9 +148,9 @@ def import_data(connection, column_dict, source_table, table_columns, primary_ke
145148
:param list data: The table data.
146149
"""
147150
primary_key = primary_key if primary_key else DEFAULT_PRIMARY_KEY
148-
temp_table = '"tmp_{table}"'.format(table=source_table)
151+
temp_table = 'tmp_{table}'.format(table=source_table)
149152
cursor = connection.cursor()
150-
cursor.execute('CREATE TEMP TABLE %s (LIKE %s INCLUDING ALL) ON COMMIT DROP;' % (temp_table, source_table))
153+
cursor.execute('CREATE TEMP TABLE "%s" (LIKE %s INCLUDING ALL) ON COMMIT DROP;' % (temp_table, source_table))
151154
copy_from(connection, data, temp_table, table_columns)
152155
set_columns = ', '.join(['{column} = s.{column}'.format(column='"{}"'.format(key)) for key in column_dict.keys()])
153156
sql = (
@@ -221,10 +224,12 @@ def get_column_dict(columns):
221224
Return a dictionary with all fields from the table definition and None as value.
222225
223226
:param list columns: A list of field definitions from the YAML schema, e.g.:
224-
[
225-
{'first_name': {'provider': 'set', 'value': 'Foo'}},
226-
{'guest_email': {'append': '@localhost', 'provider': 'md5'}},
227-
]
227+
228+
>>> [
229+
>>> {'first_name': {'provider': 'set', 'value': 'Foo'}},
230+
>>> {'guest_email': {'append': '@localhost', 'provider': 'md5'}},
231+
>>> ]
232+
228233
:return: A dictionary containing all fields to be altered with a default value of None, e.g.::
229234
{'guest_email': None}
230235
:rtype: dict
@@ -242,9 +247,11 @@ def get_column_values(row, columns):
242247
243248
:param psycopg2.extras.DictRow row: A data row from the current table to be altered
244249
:param list columns: A list of table columns with their provider rules, e.g.:
245-
[
246-
{'guest_email': {'append': '@localhost', 'provider': 'md5'}}
247-
]
250+
251+
>>> [
252+
>>> {'guest_email': {'append': '@localhost', 'provider': 'md5'}}
253+
>>> ]
254+
248255
:return: A dictionary with all fields that have to be altered and their value for a single data row, e.g.:
249256
{'guest_email': '12faf5a9bb6f6f067608dca3027c8fcb@localhost'}
250257
:rtype: dict

pganonymizer/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# -*- coding: utf-8 -*-
22

3-
__version__ = '0.4.1'
3+
__version__ = '0.5.0'

0 commit comments

Comments
 (0)