From 0fc16799fb6004d2d5c345660a77bdb250b6f43e Mon Sep 17 00:00:00 2001 From: Mark Kennedy Date: Thu, 30 Jul 2020 17:18:46 +0100 Subject: [PATCH 1/2] add new config option for charset, with default utf-8; support charset option in mysql dump reader class --- database_sanitizer/config.py | 12 ++++++++++++ database_sanitizer/dump/mysql.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/database_sanitizer/config.py b/database_sanitizer/config.py index d5f1c6e..77363ed 100644 --- a/database_sanitizer/config.py +++ b/database_sanitizer/config.py @@ -12,6 +12,7 @@ SKIP_ROWS_CONFIG_VALUE = "skip_rows" MYSQLDUMP_DEFAULT_PARAMETERS = ["--single-transaction"] PG_DUMP_DEFAULT_PARAMETERS = [] +CHARSET_DEFAULT = "utf-8" class ConfigurationError(ValueError): @@ -31,6 +32,7 @@ def __init__(self): self.addon_packages = [] self.mysqldump_params = [] self.pg_dump_params = [] + self.charset = "" @classmethod def from_file(cls, filename): @@ -73,6 +75,16 @@ def load(self, config_data): self.load_sanitizers(config_data) self.load_dump_extra_parameters(config_data) + charset = config_data.get("config",{}).get("charset", CHARSET_DEFAULT) + if not isinstance(charset, str): + raise ConfigurationError( + "'config' is %s instead of str" % ( + type(charset), + ), + ) + + self.charset = charset + def load_dump_extra_parameters(self, config_data): """ Loads extra parameters for mysqldump and/or pg_dump CLI usage. These diff --git a/database_sanitizer/dump/mysql.py b/database_sanitizer/dump/mysql.py index 8a8f63c..4f5b551 100644 --- a/database_sanitizer/dump/mysql.py +++ b/database_sanitizer/dump/mysql.py @@ -84,7 +84,7 @@ def sanitize_from_stream(stream, config): of the values stored in the database. :type config: database_sanitizer.config.Configuration|None """ - for line in io.TextIOWrapper(stream, encoding="utf-8"): + for line in io.TextIOWrapper(stream, encoding=config.charset): # Eat the trailing new line. line = line.rstrip("\n") From 75530f5d8c08a8ae05173f015c7bb495022c2d1e Mon Sep 17 00:00:00 2001 From: Mark Kennedy Date: Thu, 30 Jul 2020 17:21:57 +0100 Subject: [PATCH 2/2] update README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index c57db55..10fea10 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,7 @@ The configuration file uses following kind of syntax: ```YAML config: + charset: iso-8859-1 # defaults to utf-8 if missing, only affects mysqldump addons: - some.other.package - yet.another.package