Skip to content

Commit c129af9

Browse files
committed
feature: file enum anonymizer, inject samples in database from a plain text or csv file
missing testing files
1 parent 994db67 commit c129af9

File tree

21 files changed

+663
-3
lines changed

21 files changed

+663
-3
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
## Next
44

5+
* [feature] 🌟 File enum anonymizer, inject samples in database from a plain text or CSV file.
56
* [feature] 🌟 String pattern anonymizer, build complex strings by fetching values from other anonymizers.
67

78
## 2.0.3

docs/content/anonymization/core-anonymizers.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ This page list all *Anonymizers* provided by *DbToolsBundle*.
1414
<!--@include: ./core-anonymizers/md5.md-->
1515
<!--@include: ./core-anonymizers/string.md-->
1616
<!--@include: ./core-anonymizers/pattern.md-->
17+
<!--@include: ./core-anonymizers/file-enum.md-->
1718
<!--@include: ./core-anonymizers/lastname.md-->
1819
<!--@include: ./core-anonymizers/firstname.md-->
1920
<!--@include: ./core-anonymizers/lorem-ipsum.md-->

docs/content/anonymization/core-anonymizers/address.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,13 @@ customer:
9595
#...
9696
```
9797
:::
98+
99+
:::warning
100+
This anonymizer works at the *table level* which means that the PHP attribute
101+
cannot target object properties: you must specify table column names and not
102+
PHP class property names.
103+
:::
104+
98105
@@@
99106

100107
:::tip
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
## File enum
2+
3+
This anonymizer will fill configured column with a random value from a given sample fetched
4+
from a plain text or a CSV file.
5+
6+
Given the following file:
7+
8+
```txt
9+
none
10+
bad
11+
good
12+
expert
13+
```
14+
15+
Then:
16+
17+
@@@ standalone docker
18+
19+
```yaml [YAML]
20+
# db_tools.config.yaml
21+
anonymization:
22+
default:
23+
customer:
24+
level:
25+
anonymizer: file_enum
26+
options: {source: ./resources/levels.txt}
27+
#...
28+
```
29+
30+
@@@
31+
@@@ symfony
32+
33+
::: code-group
34+
```php [Attribute]
35+
namespace App\Entity;
36+
37+
use Doctrine\ORM\Mapping as ORM;
38+
use MakinaCorpus\DbToolsBundle\Attribute\Anonymize;
39+
40+
#[ORM\Entity()]
41+
#[ORM\Table(name: 'customer')]
42+
class Customer
43+
{
44+
// ...
45+
46+
#[ORM\Column(length: 255)]
47+
#[Anonymize(type: 'string', options: ['source' => "./resources/levels.txt"])] // [!code ++]
48+
private ?string $level = null;
49+
50+
// ...
51+
}
52+
```
53+
54+
```yaml [YAML]
55+
# config/anonymization.yaml
56+
customer:
57+
level:
58+
anonymizer: file_enum
59+
options: {source: ./resources/levels.txt}
60+
#...
61+
```
62+
:::
63+
64+
@@@
65+
66+
File will be read this way:
67+
- When using a plain text file, each line is a value, no matter what's inside.
68+
- When using a CSV file, the first column will be used instead.
69+
70+
When parsing a file file, you can set the following options as well:
71+
- `file_csv_enclosure`: if file is a CSV, use this as the enclosure character (default is `'"'`).
72+
- `file_csv_escape`: if file is a CSV, use this as the escape character (default is `'\\'`).
73+
- `file_csv_separator`: if file is a CSV, use this as the separator character (default is `','`).
74+
- `file_skip_header`: when reading any file, set this to true to skip the first line (default is `false`).
75+
76+
:::tip
77+
The filename can be absolute, or relative. For relative file resolution
78+
please see [*File name resolution*](#file-name-resolution)
79+
:::

docs/content/anonymization/core-anonymizers/iban-bic.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,10 @@ customer:
7474
```
7575
:::
7676

77+
:::warning
78+
This anonymizer works at the *table level* which means that the PHP attribute
79+
cannot target object properties: you must specify table column names and not
80+
PHP class property names.
81+
:::
82+
7783
@@@

src/Anonymization/Anonymizator.php

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,21 @@ class Anonymizator implements LoggerAwareInterface
4343

4444
private OutputInterface $output;
4545

46+
private string $basePath;
47+
4648
public function __construct(
4749
private DatabaseSession $databaseSession,
4850
private AnonymizerRegistry $anonymizerRegistry,
4951
private AnonymizationConfig $anonymizationConfig,
5052
private ?string $salt = null,
53+
/**
54+
* @todo
55+
* This is not the right place to set this, but any other alternative
56+
* would require a deep refactor of anonymizer options.
57+
*/
58+
?string $basePath = null,
5159
) {
60+
$this->basePath = $basePath ?? \getcwd();
5261
$this->logger = new NullLogger();
5362
$this->output = new NullOutput();
5463
}
@@ -89,7 +98,7 @@ protected function createAnonymizer(AnonymizerConfig $config): AbstractAnonymize
8998
return $this->anonymizerRegistry->createAnonymizer(
9099
$config->anonymizer,
91100
$config,
92-
$config->options->with(['salt' => $this->getSalt()]),
101+
$config->options->with(['salt' => $this->getSalt(), 'base_path' => $this->basePath]),
93102
$this->databaseSession
94103
);
95104
}

src/Anonymization/AnonymizatorFactory.php

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,24 @@ public function __construct(
2121
private DatabaseSessionRegistry $registry,
2222
private AnonymizerRegistry $anonymizerRegistry,
2323
private ?LoggerInterface $logger = null,
24+
/**
25+
* @todo
26+
* This is not the right place to set this, but any other alternative
27+
* would require a deep refactor of anonymizer options.
28+
*/
29+
private ?string $basePath = null,
2430
) {}
2531

32+
/**
33+
* @internal
34+
* For Laravel dependency injection only.
35+
* This can change anytime.
36+
*/
37+
public function setBasePath(?string $basePath): void
38+
{
39+
$this->basePath = $basePath;
40+
}
41+
2642
/**
2743
* Add configuration loader.
2844
*/
@@ -49,7 +65,8 @@ public function getOrCreate(string $connectionName): Anonymizator
4965
$anonymizator = new Anonymizator(
5066
$this->registry->getDatabaseSession($connectionName),
5167
$this->anonymizerRegistry,
52-
$config
68+
$config,
69+
$this->basePath,
5370
);
5471

5572
if ($this->logger) {

src/Anonymization/Anonymizer/AnonymizerRegistry.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class AnonymizerRegistry
1717
Core\ConstantAnonymizer::class,
1818
Core\DateAnonymizer::class,
1919
Core\EmailAnonymizer::class,
20+
Core\FileEnumAnonymizer::class,
2021
Core\FirstNameAnonymizer::class,
2122
Core\FloatAnonymizer::class,
2223
Core\IbanBicAnonymizer::class,
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\Core;
6+
7+
use MakinaCorpus\DbToolsBundle\Anonymization\Anonymizer\AbstractEnumAnonymizer;
8+
use MakinaCorpus\DbToolsBundle\Attribute\AsAnonymizer;
9+
use MakinaCorpus\DbToolsBundle\Helper\FileReader;
10+
11+
#[AsAnonymizer(
12+
name: 'file_enum',
13+
pack: 'core',
14+
description: <<<TXT
15+
Anonymize any text value using a random element from the given file.
16+
Options are:
17+
- 'source': filename to load, filename must be absolute, or relative
18+
to the configuration file directory.
19+
- 'file_csv_enclosure': if file is a CSV, use this as the enclosure
20+
character (default is '"').
21+
- 'file_csv_escape': if file is a CSV, use this as the escape
22+
character (default is '\\').
23+
- 'file_csv_separator': if file is a CSV, use this as the separator
24+
character (default is ',').
25+
- 'file_skip_header': when reading any file, set this to true to skip
26+
the first line (default is false).
27+
TXT
28+
)]
29+
class FileEnumAnonymizer extends AbstractEnumAnonymizer
30+
{
31+
private ?string $filename = null;
32+
33+
protected function getFilename(): string
34+
{
35+
if ($this->filename) {
36+
return $this->filename;
37+
}
38+
39+
$filename = $this->options->getString('source', null, true);
40+
41+
if ($basePath = $this->options->getString('base_path')) {
42+
$filename = FileReader::ensurePathAbsolute($filename, $basePath);
43+
}
44+
45+
FileReader::ensureFile($filename);
46+
47+
return $this->filename = $filename;
48+
}
49+
50+
#[\Override]
51+
protected function validateOptions(): void
52+
{
53+
parent::validateOptions();
54+
55+
$this->getFilename();
56+
}
57+
58+
#[\Override]
59+
protected function getSample(): array
60+
{
61+
return \iterator_to_array(
62+
FileReader::readEnumFile(
63+
$this->getFilename(),
64+
$this->options,
65+
)
66+
);
67+
}
68+
}

src/Bridge/Laravel/DbToolsServiceProvider.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,8 @@ public function register(): void
141141
$this->app->resolving(
142142
AnonymizatorFactory::class,
143143
function (AnonymizatorFactory $factory, Application $app): void {
144+
$factory->setBasePath($app->basePath());
145+
144146
/** @var Repository $config */
145147
$config = $app->make('config');
146148

0 commit comments

Comments
 (0)