From f30f9266bb95fcc962aac773adf06ad18caa6cc6 Mon Sep 17 00:00:00 2001 From: Francois Blackburn Date: Fri, 7 Jun 2024 14:39:23 -0400 Subject: [PATCH 1/2] use long syntax for pg_dump command why: explicit is always better, by removing confusion on uncommon options --- pganonymize/utils.py | 4 ++-- tests/test_cli.py | 2 +- tests/test_utils.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pganonymize/utils.py b/pganonymize/utils.py index 576586d..1a13aa5 100644 --- a/pganonymize/utils.py +++ b/pganonymize/utils.py @@ -298,8 +298,8 @@ def create_database_dump(filename, db_args): env_vars = '' if db_args.get('password'): env_vars += 'PGPASSWORD={password}'.format(password=db_args['password']) - arguments = '-d {dbname} -U {user} -h {host} -p {port}'.format(**db_args) - cmd = '{env_vars}pg_dump -Fc -Z 9 {args} -f {filename}'.format( + arguments = '--dbname {dbname} --username {user} --host {host} --port {port}'.format(**db_args) + cmd = '{env_vars}pg_dump --format custom --compress 9 {args} --file {filename}'.format( env_vars='{} '.format(env_vars) if env_vars else '', args=arguments, filename=filename diff --git a/tests/test_cli.py b/tests/test_cli.py index 3fa701a..9aab9ba 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -56,7 +56,7 @@ class TestCli(object): call('UPDATE "auth_user" t SET "first_name" = s."first_name", "last_name" = s."last_name", "email" = s."email" FROM "tmp_auth_user" s WHERE t."id" = s."id"') # noqa ], 1, - [call('PGPASSWORD=my-cool-password pg_dump -Fc -Z 9 -d db -U root -h localhost -p 5432 -f ./dump.sql', shell=True)] # noqa + [call('PGPASSWORD=my-cool-password pg_dump --format custom --compress 9 --dbname db --username root --host localhost --port 5432 --file ./dump.sql', shell=True)] # noqa ], ['--list-providers --parallel', diff --git a/tests/test_utils.py b/tests/test_utils.py index 27fd592..4ac1a1a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -241,7 +241,7 @@ def test(self, mock_call): {'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432}, ) mock_call.assert_called_once_with( - 'pg_dump -Fc -Z 9 -d database -U foo -h localhost -p 5432 -f /tmp/dump.gz', + 'pg_dump --format custom --compress 9 --dbname database --username foo --host localhost --port 5432 --file /tmp/dump.gz', shell=True, ) @@ -252,6 +252,6 @@ def test_with_password(self, mock_call): {'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432, 'password': 'pass'}, ) mock_call.assert_called_once_with( - 'PGPASSWORD=pass pg_dump -Fc -Z 9 -d database -U foo -h localhost -p 5432 -f /tmp/dump.gz', + 'PGPASSWORD=pass pg_dump --format custom --compress 9 --dbname database --username foo --host localhost --port 5432 --file /tmp/dump.gz', shell=True, ) From 5a10c7495c593a0aff68bb3cd617cb1774a34e92 Mon Sep 17 00:00:00 2001 From: Francois Blackburn Date: Fri, 7 Jun 2024 14:51:05 -0400 Subject: [PATCH 2/2] add dump-options to modify pg_dump command --- README.rst | 4 +++- pganonymize/cli.py | 4 +++- pganonymize/utils.py | 9 +++++---- tests/test_cli.py | 14 +++++++------- tests/test_utils.py | 29 ++++++++++++++++++++--------- 5 files changed, 38 insertions(+), 22 deletions(-) diff --git a/README.rst b/README.rst index f2f7d05..c2427da 100644 --- a/README.rst +++ b/README.rst @@ -85,7 +85,9 @@ Usage --port PORT Port of the database --dry-run Don't commit changes made on the database --dump-file DUMP_FILE - Create a database dump file with the given name + Create a database dump file with the given name + --dump-options DUMP_OPTIONS + Options to pass to the pg_dump command --init-sql INIT_SQL SQL to run before starting anonymization --parallel Data anonymization is done in parallel diff --git a/pganonymize/cli.py b/pganonymize/cli.py index 9757885..9d035b0 100644 --- a/pganonymize/cli.py +++ b/pganonymize/cli.py @@ -46,6 +46,8 @@ def get_arg_parser(): parser.add_argument('--dry-run', action='store_true', help='Don\'t commit changes made on the database', default=False) parser.add_argument('--dump-file', help='Create a database dump file with the given name') + parser.add_argument('--dump-options', help='Options to pass to the pg_dump command', + default='--format custom --compress 9') parser.add_argument('--init-sql', help='SQL to run before starting anonymization', default=False) parser.add_argument( '--parallel', @@ -101,4 +103,4 @@ def main(args): logging.info('Anonymization took {:.2f}s'.format(end_time - start_time)) if args.dump_file: - create_database_dump(args.dump_file, pg_args) + create_database_dump(args.dump_file, pg_args, args.dump_options) diff --git a/pganonymize/utils.py b/pganonymize/utils.py index 1a13aa5..211720e 100644 --- a/pganonymize/utils.py +++ b/pganonymize/utils.py @@ -288,7 +288,7 @@ def truncate_tables(connection): cursor.close() -def create_database_dump(filename, db_args): +def create_database_dump(filename, db_args, dump_args): """ Create a dump file from the current database. @@ -299,10 +299,11 @@ def create_database_dump(filename, db_args): if db_args.get('password'): env_vars += 'PGPASSWORD={password}'.format(password=db_args['password']) arguments = '--dbname {dbname} --username {user} --host {host} --port {port}'.format(**db_args) - cmd = '{env_vars}pg_dump --format custom --compress 9 {args} --file {filename}'.format( + cmd = '{env_vars}pg_dump {dump_args} {db_args} --file {filename}'.format( env_vars='{} '.format(env_vars) if env_vars else '', - args=arguments, - filename=filename + dump_args=dump_args, + db_args=arguments, + filename=filename, ) logging.info('Creating database dump file "%s"', filename) subprocess.call(cmd, shell=True) diff --git a/tests/test_cli.py b/tests/test_cli.py index 9aab9ba..b1c462d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -17,7 +17,7 @@ class TestCli(object): @pytest.mark.parametrize('cli_args, expected, expected_executes, commit_calls, call_dump', [ ['--host localhost --port 5432 --user root --password my-cool-password --dbname db --schema ./tests/schemes/valid_schema.yml -v --init-sql "set work_mem=\'1GB\'"', # noqa Namespace(verbose=1, list_providers=False, schema='./tests/schemes/valid_schema.yml', dbname='db', user='root', - password='my-cool-password', host='localhost', port='5432', dry_run=False, dump_file=None, init_sql="set work_mem='1GB'", parallel=False), # noqa + password='my-cool-password', host='localhost', port='5432', dry_run=False, dump_file=None, dump_options='--format custom --compress 9', init_sql="set work_mem='1GB'", parallel=False), # noqa [call("set work_mem='1GB'"), call('TRUNCATE TABLE "django_session"'), call('SELECT COUNT(*) FROM "auth_user"'), @@ -32,7 +32,7 @@ class TestCli(object): ], ['--dry-run --host localhost --port 5432 --user root --password my-cool-password --dbname db --schema ./tests/schemes/valid_schema.yml -v --init-sql "set work_mem=\'1GB\'"', # noqa Namespace(verbose=1, list_providers=False, schema='./tests/schemes/valid_schema.yml', dbname='db', user='root', - password='my-cool-password', host='localhost', port='5432', dry_run=True, dump_file=None, init_sql="set work_mem='1GB'", parallel=False), # noqa + password='my-cool-password', host='localhost', port='5432', dry_run=True, dump_file=None, dump_options='--format custom --compress 9', init_sql="set work_mem='1GB'", parallel=False), # noqa [call("set work_mem='1GB'"), call('TRUNCATE TABLE "django_session"'), call('SELECT "id", "first_name", "last_name", "email" FROM "auth_user" LIMIT 100'), @@ -42,9 +42,9 @@ class TestCli(object): ], 0, [] ], - ['--dump-file ./dump.sql --host localhost --port 5432 --user root --password my-cool-password --dbname db --schema ./tests/schemes/valid_schema.yml -v --init-sql "set work_mem=\'1GB\'"', # noqa + ['--dump-file ./dump.sql --dump-options "--format plain" --host localhost --port 5432 --user root --password my-cool-password --dbname db --schema ./tests/schemes/valid_schema.yml -v --init-sql "set work_mem=\'1GB\'"', # noqa Namespace(verbose=1, list_providers=False, schema='./tests/schemes/valid_schema.yml', dbname='db', user='root', - password='my-cool-password', host='localhost', port='5432', dry_run=False, dump_file='./dump.sql', init_sql="set work_mem='1GB'", parallel=False), # noqa + password='my-cool-password', host='localhost', port='5432', dry_run=False, dump_file='./dump.sql', dump_options='--format plain', init_sql="set work_mem='1GB'", parallel=False), # noqa [ call("set work_mem='1GB'"), call('TRUNCATE TABLE "django_session"'), @@ -56,14 +56,14 @@ class TestCli(object): call('UPDATE "auth_user" t SET "first_name" = s."first_name", "last_name" = s."last_name", "email" = s."email" FROM "tmp_auth_user" s WHERE t."id" = s."id"') # noqa ], 1, - [call('PGPASSWORD=my-cool-password pg_dump --format custom --compress 9 --dbname db --username root --host localhost --port 5432 --file ./dump.sql', shell=True)] # noqa + [call('PGPASSWORD=my-cool-password pg_dump --format plain --dbname db --username root --host localhost --port 5432 --file ./dump.sql', shell=True)] # noqa ], ['--list-providers --parallel', Namespace(verbose=None, list_providers=True, schema='schema.yml', dbname=None, user=None, - password='', host='localhost', port='5432', dry_run=False, dump_file=None, init_sql=False, parallel=True), # noqa + password='', host='localhost', port='5432', dry_run=False, dump_file=None, dump_options='--format custom --compress 9', init_sql=False, parallel=True), # noqa [], 0, [] - ] + ], ]) def test_cli_args(self, subprocess, patched_connect, quote_ident, cli_args, expected, expected_executes, commit_calls, call_dump): # noqa arg_parser = get_arg_parser() diff --git a/tests/test_utils.py b/tests/test_utils.py index 4ac1a1a..03f6d4a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -236,22 +236,33 @@ class TestCreateDatabaseDump(object): @patch('pganonymize.utils.subprocess.call') def test(self, mock_call): - create_database_dump( - '/tmp/dump.gz', - {'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432}, - ) + filename = '/tmp/dump.gz' + db_args = {'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432} + dump_args = '--format custom --compress 9' + create_database_dump(filename, db_args, dump_args) mock_call.assert_called_once_with( - 'pg_dump --format custom --compress 9 --dbname database --username foo --host localhost --port 5432 --file /tmp/dump.gz', + 'pg_dump --format custom --compress 9 --dbname database --username foo --host localhost --port 5432 --file /tmp/dump.gz', # noqa shell=True, ) @patch('pganonymize.utils.subprocess.call') def test_with_password(self, mock_call): - create_database_dump( - '/tmp/dump.gz', - {'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432, 'password': 'pass'}, + filename = '/tmp/dump.gz' + db_args = {'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432, 'password': 'pass'} + dump_args = '--format custom --compress 9' + create_database_dump(filename, db_args, dump_args) + mock_call.assert_called_once_with( + 'PGPASSWORD=pass pg_dump --format custom --compress 9 --dbname database --username foo --host localhost --port 5432 --file /tmp/dump.gz', # noqa + shell=True, ) + + @patch('pganonymize.utils.subprocess.call') + def test_with_custom_dump_args(self, mock_call): + filename = '/tmp/dump.gz' + db_args = {'dbname': 'database', 'user': 'foo', 'host': 'localhost', 'port': 5432} + dump_args = '--format plain' + create_database_dump(filename, db_args, dump_args) mock_call.assert_called_once_with( - 'PGPASSWORD=pass pg_dump --format custom --compress 9 --dbname database --username foo --host localhost --port 5432 --file /tmp/dump.gz', + 'pg_dump --format plain --dbname database --username foo --host localhost --port 5432 --file /tmp/dump.gz', # noqa shell=True, )