Skip to content

Commit 184f299

Browse files
committed
Add some rudimentary perf benchmarking functions
1 parent 1db4a34 commit 184f299

File tree

3 files changed

+116
-0
lines changed

3 files changed

+116
-0
lines changed

Makefile

+6
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,9 @@ generate:
2323
@echo "Generated reference data..."
2424
@echo
2525
@php ./script/generate-reference-data.php
26+
27+
.PHONY: benchmark
28+
benchmark:
29+
@echo "Running benchmark..."
30+
@echo
31+
@php ./script/benchmark.php

script/benchmark.php

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
<?php declare(strict_types=1);
2+
3+
# This script is used to benchmark Normcore transforms against
4+
5+
require __DIR__ . '/../vendor/autoload.php';
6+
7+
use BFFdotFM\Normcore\Normcore;
8+
use League\Csv\CharsetConverter;
9+
use League\Csv\Reader;
10+
11+
12+
# $dataSource = 'albums.csv';
13+
# $dataSource = 'artists.csv';
14+
# $dataSource = 'titles.csv';
15+
$dataSource = 'labels.csv';
16+
$limit = 50000;
17+
18+
$transforms = array(
19+
'trimWhitespace',
20+
'handleDistroKidLabels',
21+
'discardLicensingBlurb',
22+
'removeTrailingYear',
23+
'discardCopyright',
24+
'removePhrasePunctuation',
25+
'discardIncorporation',
26+
'discardOrganizationGroup',
27+
'discardLabelNameRedundancies',
28+
'discardYearPrefix',
29+
'trimPunctuation',
30+
'normalizeUnicode',
31+
'flattenStylisticCharacters',
32+
'downCase',
33+
'filterRedundantWords',
34+
'removePunctuation',
35+
'removeWhitespace'
36+
);
37+
38+
$encoder = (new CharsetConverter())
39+
->inputEncoding('utf-8')
40+
->outputEncoding('utf-8');
41+
42+
$dataPath = __DIR__ . '/../test/data/labels.csv';
43+
$csv = Reader::createFromPath($dataPath, 'r');
44+
$csv->skipInputBOM();
45+
$bom = $csv->getInputBOM();
46+
$csv->setDelimiter("\t");
47+
48+
Normcore::resetAnalysis();
49+
echo "\n";
50+
51+
foreach ($csv as $index => $row) {
52+
if ($limit && $index > $limit) {
53+
break;
54+
}
55+
56+
# Ignore the BOM/Header line:
57+
if ($index === 0) {
58+
continue;
59+
}
60+
$inputString = $row[0];
61+
62+
Normcore::analyzeTransforms($inputString, $transforms);
63+
echo '.';
64+
}
65+
66+
echo "\n\n";
67+
echo "Performance Breakdown\n";
68+
$stats = Normcore::getAnalysis();
69+
usort($stats, fn($a, $b) => $a['time'] - $b['time']);
70+
foreach ($stats as $stat) {
71+
echo(sprintf("%s\t%d\t%.2f\t%.2f\n", $stat['function'], $stat['calls'], $stat['time'], $stat['time'] / $stat['calls']));
72+
}

src/Normcore.php

+38
Original file line numberDiff line numberDiff line change
@@ -108,4 +108,42 @@ protected static function transform(string $string, array $transforms = array())
108108
}, $string);
109109
}
110110

111+
112+
protected static $analysis = array();
113+
114+
/**
115+
* Run transform against data, recording execution time for each transform
116+
*/
117+
public static function analyzeTransforms(string $string, array $transforms = array()) : string {
118+
return array_reduce($transforms, function($inputString, $function) {
119+
$start = microtime(true);
120+
$newVal = Transforms::$function($inputString);
121+
$execTime = microtime(true) - $start;
122+
if (!isset(self::$analysis[$function])) {
123+
self::$analysis[$function] = array();
124+
}
125+
self::$analysis[$function][] = $execTime;
126+
127+
if (!empty($newVal)) {
128+
return $newVal;
129+
} else {
130+
return $inputString;
131+
}
132+
}, $string);
133+
}
134+
135+
public static function getAnalysis() {
136+
return array_map(function ($func) {
137+
return array(
138+
'function' => $func,
139+
'calls' => count(self::$analysis[$func]),
140+
'time' => array_sum(self::$analysis[$func])
141+
);
142+
}, array_keys(self::$analysis));
143+
}
144+
145+
public static function resetAnalysis() {
146+
self::$analysis = array();
147+
}
148+
111149
}

0 commit comments

Comments
 (0)