Skip to content

Commit

Permalink
Merge pull request #63 from fblackburn1/pg-dump-options
Browse files Browse the repository at this point in the history
Add `--dump-options` arguments
  • Loading branch information
hkage authored Jun 18, 2024
2 parents d72a45b + 5a10c74 commit 3420b72
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 23 deletions.
4 changes: 3 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ Usage
--port PORT Port of the database
--dry-run Don't commit changes made on the database
--dump-file DUMP_FILE
Create a database dump file with the given name
Create a database dump file with the given name
--dump-options DUMP_OPTIONS
Options to pass to the pg_dump command
--init-sql INIT_SQL SQL to run before starting anonymization
--parallel Data anonymization is done in parallel
Expand Down
4 changes: 3 additions & 1 deletion pganonymize/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ def get_arg_parser():
parser.add_argument('--dry-run', action='store_true', help='Don\'t commit changes made on the database',
default=False)
parser.add_argument('--dump-file', help='Create a database dump file with the given name')
parser.add_argument('--dump-options', help='Options to pass to the pg_dump command',
default='--format custom --compress 9')
parser.add_argument('--init-sql', help='SQL to run before starting anonymization', default=False)
parser.add_argument(
'--parallel',
Expand Down Expand Up @@ -101,4 +103,4 @@ def main(args):
logging.info('Anonymization took {:.2f}s'.format(end_time - start_time))

if args.dump_file:
create_database_dump(args.dump_file, pg_args)
create_database_dump(args.dump_file, pg_args, args.dump_options)
11 changes: 6 additions & 5 deletions pganonymize/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def truncate_tables(connection):
cursor.close()


def create_database_dump(filename, db_args):
def create_database_dump(filename, db_args, dump_args):
"""
Create a dump file from the current database.
Expand All @@ -298,11 +298,12 @@ def create_database_dump(filename, db_args):
env_vars = ''
if db_args.get('password'):
env_vars += 'PGPASSWORD={password}'.format(password=db_args['password'])
arguments = '-d {dbname} -U {user} -h {host} -p {port}'.format(**db_args)
cmd = '{env_vars}pg_dump -Fc -Z 9 {args} -f {filename}'.format(
arguments = '--dbname {dbname} --username {user} --host {host} --port {port}'.format(**db_args)
cmd = '{env_vars}pg_dump {dump_args} {db_args} --file {filename}'.format(
env_vars='{} '.format(env_vars) if env_vars else '',
args=arguments,
filename=filename
dump_args=dump_args,
db_args=arguments,
filename=filename,
)
logging.info('Creating database dump file "%s"', filename)
subprocess.call(cmd, shell=True)
Expand Down
14 changes: 7 additions & 7 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class TestCli(object):
@pytest.mark.parametrize('cli_args, expected, expected_executes, commit_calls, call_dump', [
['--host localhost --port 5432 --user root --password my-cool-password --dbname db --schema ./tests/schemes/valid_schema.yml -v --init-sql "set work_mem=\'1GB\'"', # noqa
Namespace(verbose=1, list_providers=False, schema='./tests/schemes/valid_schema.yml', dbname='db', user='root',
password='my-cool-password', host='localhost', port='5432', dry_run=False, dump_file=None, init_sql="set work_mem='1GB'", parallel=False), # noqa
password='my-cool-password', host='localhost', port='5432', dry_run=False, dump_file=None, dump_options='--format custom --compress 9', init_sql="set work_mem='1GB'", parallel=False), # noqa
[call("set work_mem='1GB'"),
call('TRUNCATE TABLE "django_session"'),
call('SELECT COUNT(*) FROM "auth_user"'),
Expand All @@ -32,7 +32,7 @@ class TestCli(object):
],
['--dry-run --host localhost --port 5432 --user root --password my-cool-password --dbname db --schema ./tests/schemes/valid_schema.yml -v --init-sql "set work_mem=\'1GB\'"', # noqa
Namespace(verbose=1, list_providers=False, schema='./tests/schemes/valid_schema.yml', dbname='db', user='root',
password='my-cool-password', host='localhost', port='5432', dry_run=True, dump_file=None, init_sql="set work_mem='1GB'", parallel=False), # noqa
password='my-cool-password', host='localhost', port='5432', dry_run=True, dump_file=None, dump_options='--format custom --compress 9', init_sql="set work_mem='1GB'", parallel=False), # noqa
[call("set work_mem='1GB'"),
call('TRUNCATE TABLE "django_session"'),
call('SELECT "id", "first_name", "last_name", "email" FROM "auth_user" LIMIT 100'),
Expand All @@ -42,9 +42,9 @@ class TestCli(object):
],
0, []
],
['--dump-file ./dump.sql --host localhost --port 5432 --user root --password my-cool-password --dbname db --schema ./tests/schemes/valid_schema.yml -v --init-sql "set work_mem=\'1GB\'"', # noqa
['--dump-file ./dump.sql --dump-options "--format plain" --host localhost --port 5432 --user root --password my-cool-password --dbname db --schema ./tests/schemes/valid_schema.yml -v --init-sql "set work_mem=\'1GB\'"', # noqa
Namespace(verbose=1, list_providers=False, schema='./tests/schemes/valid_schema.yml', dbname='db', user='root',
password='my-cool-password', host='localhost', port='5432', dry_run=False, dump_file='./dump.sql', init_sql="set work_mem='1GB'", parallel=False), # noqa
password='my-cool-password', host='localhost', port='5432', dry_run=False, dump_file='./dump.sql', dump_options='--format plain', init_sql="set work_mem='1GB'", parallel=False), # noqa
[
call("set work_mem='1GB'"),
call('TRUNCATE TABLE "django_session"'),
Expand All @@ -56,14 +56,14 @@ class TestCli(object):
call('UPDATE "auth_user" t SET "first_name" = s."first_name", "last_name" = s."last_name", "email" = s."email" FROM "tmp_auth_user" s WHERE t."id" = s."id"') # noqa
],
1,
[call('PGPASSWORD=my-cool-password pg_dump -Fc -Z 9 -d db -U root -h localhost -p 5432 -f ./dump.sql', shell=True)] # noqa
[call('PGPASSWORD=my-cool-password pg_dump --format plain --dbname db --username root --host localhost --port 5432 --file ./dump.sql', shell=True)] # noqa
],
['--list-providers --parallel',
Namespace(verbose=None, list_providers=True, schema='schema.yml', dbname=None, user=None,
password='', host='localhost', port='5432', dry_run=False, dump_file=None, init_sql=False, parallel=True), # noqa
password='', host='localhost', port='5432', dry_run=False, dump_file=None, dump_options='--format custom --compress 9', init_sql=False, parallel=True), # noqa
[], 0, []
]
],
])
def test_cli_args(self, subprocess, patched_connect, quote_ident, cli_args, expected, expected_executes, commit_calls, call_dump): # noqa
arg_parser = get_arg_parser()
Expand Down
29 changes: 20 additions & 9 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,22 +236,33 @@ class TestCreateDatabaseDump(object):

@patch('pganonymize.utils.subprocess.call')
def test(self, mock_call):
create_database_dump(
'/tmp/dump.gz',
{'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432},
)
filename = '/tmp/dump.gz'
db_args = {'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432}
dump_args = '--format custom --compress 9'
create_database_dump(filename, db_args, dump_args)
mock_call.assert_called_once_with(
'pg_dump -Fc -Z 9 -d database -U foo -h localhost -p 5432 -f /tmp/dump.gz',
'pg_dump --format custom --compress 9 --dbname database --username foo --host localhost --port 5432 --file /tmp/dump.gz', # noqa
shell=True,
)

@patch('pganonymize.utils.subprocess.call')
def test_with_password(self, mock_call):
create_database_dump(
'/tmp/dump.gz',
{'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432, 'password': 'pass'},
filename = '/tmp/dump.gz'
db_args = {'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432, 'password': 'pass'}
dump_args = '--format custom --compress 9'
create_database_dump(filename, db_args, dump_args)
mock_call.assert_called_once_with(
'PGPASSWORD=pass pg_dump --format custom --compress 9 --dbname database --username foo --host localhost --port 5432 --file /tmp/dump.gz', # noqa
shell=True,
)

@patch('pganonymize.utils.subprocess.call')
def test_with_custom_dump_args(self, mock_call):
filename = '/tmp/dump.gz'
db_args = {'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432}
dump_args = '--format plain'
create_database_dump(filename, db_args, dump_args)
mock_call.assert_called_once_with(
'PGPASSWORD=pass pg_dump -Fc -Z 9 -d database -U foo -h localhost -p 5432 -f /tmp/dump.gz',
'pg_dump --format plain --dbname database --username foo --host localhost --port 5432 --file /tmp/dump.gz', # noqa
shell=True,
)

0 comments on commit 3420b72

Please sign in to comment.