Skip to content

Commit ab5160b

Browse files
committed
feature: file enum anonymizer, inject samples in database from a plain text or csv file
1 parent 8c32e1a commit ab5160b

File tree

21 files changed

+654
-2
lines changed

21 files changed

+654
-2
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
## Next
44

5+
* [feature] 🌟 File enum anonymizer, inject samples in database from a plain text or CSV file.
56
* [feature] 🌟 String pattern anonymizer, build complex strings by fetching values from other anonymizers.
67
* [internal] introduce anonymizer context for carrying environment configuration to anonymizers (#235).
78
* [bc] Salt in `AbstractAnonymizer::$option->get('salt')` in now in `AbstractAnonymizer::$context->salt` (#235).

docs/content/anonymization/core-anonymizers.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ This page list all *Anonymizers* provided by *DbToolsBundle*.
1414
<!--@include: ./core-anonymizers/md5.md-->
1515
<!--@include: ./core-anonymizers/string.md-->
1616
<!--@include: ./core-anonymizers/pattern.md-->
17+
<!--@include: ./core-anonymizers/file-enum.md-->
1718
<!--@include: ./core-anonymizers/lastname.md-->
1819
<!--@include: ./core-anonymizers/firstname.md-->
1920
<!--@include: ./core-anonymizers/lorem-ipsum.md-->

docs/content/anonymization/core-anonymizers/address.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,13 @@ customer:
9595
#...
9696
```
9797
:::
98+
99+
:::warning
100+
This anonymizer works at the *table level* which means that the PHP attribute
101+
cannot target object properties: you must specify table column names and not
102+
PHP class property names.
103+
:::
104+
98105
@@@
99106

100107
:::tip
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
## File enum
2+
3+
This anonymizer will fill configured column with a random value from a given sample fetched
4+
from a plain text or a CSV file.
5+
6+
Given the following file:
7+
8+
```txt
9+
none
10+
bad
11+
good
12+
expert
13+
```
14+
15+
Then:
16+
17+
@@@ standalone docker
18+
19+
```yaml [YAML]
20+
# db_tools.config.yaml
21+
anonymization:
22+
default:
23+
customer:
24+
level:
25+
anonymizer: file_enum
26+
options: {source: ./resources/levels.txt}
27+
#...
28+
```
29+
30+
@@@
31+
@@@ symfony
32+
33+
::: code-group
34+
```php [Attribute]
35+
namespace App\Entity;
36+
37+
use Doctrine\ORM\Mapping as ORM;
38+
use MakinaCorpus\DbToolsBundle\Attribute\Anonymize;
39+
40+
#[ORM\Entity()]
41+
#[ORM\Table(name: 'customer')]
42+
class Customer
43+
{
44+
// ...
45+
46+
#[ORM\Column(length: 255)]
47+
#[Anonymize(type: 'string', options: ['source' => "./resources/levels.txt"])] // [!code ++]
48+
private ?string $level = null;
49+
50+
// ...
51+
}
52+
```
53+
54+
```yaml [YAML]
55+
# config/anonymization.yaml
56+
customer:
57+
level:
58+
anonymizer: file_enum
59+
options: {source: ./resources/levels.txt}
60+
#...
61+
```
62+
:::
63+
64+
@@@
65+
66+
File will be read this way:
67+
- When using a plain text file, each line is a value, no matter what's inside.
68+
- When using a CSV file, the first column will be used instead.
69+
70+
When parsing a file file, you can set the following options as well:
71+
- `file_csv_enclosure`: if file is a CSV, use this as the enclosure character (default is `'"'`).
72+
- `file_csv_escape`: if file is a CSV, use this as the escape character (default is `'\\'`).
73+
- `file_csv_separator`: if file is a CSV, use this as the separator character (default is `','`).
74+
- `file_skip_header`: when reading any file, set this to true to skip the first line (default is `false`).
75+
76+
:::tip
77+
The filename can be absolute, or relative. For relative file resolution
78+
please see [*File name resolution*](#file-name-resolution)
79+
:::

docs/content/anonymization/core-anonymizers/iban-bic.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,10 @@ customer:
7474
```
7575
:::
7676

77+
:::warning
78+
This anonymizer works at the *table level* which means that the PHP attribute
79+
cannot target object properties: you must specify table column names and not
80+
PHP class property names.
81+
:::
82+
7783
@@@

src/Anonymization/AnonymizatorFactory.php

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
namespace MakinaCorpus\DbToolsBundle\Anonymization;
66

77
use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\AnonymizerRegistry;
8+
use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Context;
89
use MakinaCorpus\DbToolsBundle\Anonymization\Config\AnonymizationConfig;
910
use MakinaCorpus\DbToolsBundle\Anonymization\Config\Loader\LoaderInterface;
1011
use MakinaCorpus\DbToolsBundle\Database\DatabaseSessionRegistry;
@@ -21,8 +22,24 @@ public function __construct(
2122
private DatabaseSessionRegistry $registry,
2223
private AnonymizerRegistry $anonymizerRegistry,
2324
private ?LoggerInterface $logger = null,
25+
/**
26+
* @todo
27+
* This is not the right place to set this, but any other alternative
28+
* would require a deep refactor of anonymizer options.
29+
*/
30+
private ?string $basePath = null,
2431
) {}
2532

33+
/**
34+
* @internal
35+
* For Laravel dependency injection only.
36+
* This can change anytime.
37+
*/
38+
public function setBasePath(?string $basePath): void
39+
{
40+
$this->basePath = $basePath;
41+
}
42+
2643
/**
2744
* Add configuration loader.
2845
*/
@@ -49,7 +66,8 @@ public function getOrCreate(string $connectionName): Anonymizator
4966
$anonymizator = new Anonymizator(
5067
$this->registry->getDatabaseSession($connectionName),
5168
$this->anonymizerRegistry,
52-
$config
69+
$config,
70+
new Context(basePath: $this->basePath),
5371
);
5472

5573
if ($this->logger) {

src/Anonymization/Anonymizer/AnonymizerRegistry.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class AnonymizerRegistry
1717
Core\ConstantAnonymizer::class,
1818
Core\DateAnonymizer::class,
1919
Core\EmailAnonymizer::class,
20+
Core\FileEnumAnonymizer::class,
2021
Core\FirstNameAnonymizer::class,
2122
Core\FloatAnonymizer::class,
2223
Core\IbanBicAnonymizer::class,

src/Anonymization/Anonymizer/Context.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@
77
class Context
88
{
99
public readonly string $salt;
10+
public readonly string $basePath;
1011

1112
public function __construct(
1213
?string $salt = null,
14+
?string $basePath = null,
1315
) {
1416
$this->salt = $salt ?? self::generateRandomSalt();
17+
$this->basePath = $basePath ?? \getcwd();
1518
}
1619

1720
public static function generateRandomSalt(): string
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Core;
6+
7+
use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\AbstractEnumAnonymizer;
8+
use MakinaCorpus\DbToolsBundle\Attribute\AsAnonymizer;
9+
use MakinaCorpus\DbToolsBundle\Helper\FileReader;
10+
11+
#[AsAnonymizer(
12+
name: 'file_enum',
13+
pack: 'core',
14+
description: <<<TXT
15+
Anonymize any text value using a random element from the given file.
16+
Options are:
17+
- 'source': filename to load, filename must be absolute, or relative
18+
to the configuration file directory.
19+
- 'file_csv_enclosure': if file is a CSV, use this as the enclosure
20+
character (default is '"').
21+
- 'file_csv_escape': if file is a CSV, use this as the escape
22+
character (default is '\\').
23+
- 'file_csv_separator': if file is a CSV, use this as the separator
24+
character (default is ',').
25+
- 'file_skip_header': when reading any file, set this to true to skip
26+
the first line (default is false).
27+
TXT
28+
)]
29+
class FileEnumAnonymizer extends AbstractEnumAnonymizer
30+
{
31+
private ?string $filename = null;
32+
33+
protected function getFilename(): string
34+
{
35+
if ($this->filename) {
36+
return $this->filename;
37+
}
38+
39+
$filename = $this->options->getString('source', null, true);
40+
$filename = FileReader::ensurePathAbsolute($filename, $this->context->basePath);
41+
42+
FileReader::ensureFile($filename);
43+
44+
return $this->filename = $filename;
45+
}
46+
47+
#[\Override]
48+
protected function validateOptions(): void
49+
{
50+
parent::validateOptions();
51+
52+
$this->getFilename();
53+
}
54+
55+
#[\Override]
56+
protected function getSample(): array
57+
{
58+
return \iterator_to_array(
59+
FileReader::readEnumFile(
60+
$this->getFilename(),
61+
$this->options,
62+
)
63+
);
64+
}
65+
}

src/Bridge/Laravel/DbToolsServiceProvider.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,8 @@ public function register(): void
141141
$this->app->resolving(
142142
AnonymizatorFactory::class,
143143
function (AnonymizatorFactory $factory, Application $app): void {
144+
$factory->setBasePath($app->basePath());
145+
144146
/** @var Repository $config */
145147
$config = $app->make('config');
146148

0 commit comments

Comments
 (0)