Skip to content

Commit 02b617f

Browse files
authored
Merge pull request #659 from yukinarit/rework-bench-scripts
Rework bench script, update deps and add pydantic
2 parents 45efa03 + 0789094 commit 02b617f

19 files changed

+889
-155
lines changed

.pre-commit-config.yaml

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,28 @@ repos:
77
- id: trailing-whitespace
88
- id: end-of-file-fixer
99

10-
- repo: https://github.com/psf/black
11-
rev: 24.10.0
10+
- repo: local
1211
hooks:
1312
- id: black
14-
args:
15-
- .
16-
- repo: https://github.com/RobertCraigie/pyright-python
17-
rev: v1.1.389
18-
hooks:
19-
- id: pyright
20-
additional_dependencies: ['pyyaml', 'msgpack', 'msgpack-types']
21-
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.8.4
23-
hooks:
24-
- id: ruff
25-
args:
26-
- --fix
13+
name: black
14+
entry: poetry run black
15+
language: system
16+
types: [python]
17+
args: [.]
18+
- id: pyright
19+
name: pyright
20+
entry: poetry run pyright
21+
language: system
22+
types: [python]
23+
- id: ruff
24+
name: ruff
25+
entry: poetry run ruff check --fix
26+
language: system
27+
types: [python]
28+
- id: mypy
29+
name: mypy
30+
entry: poetry run mypy
31+
language: system
32+
types: [python]
33+
pass_filenames: false
34+
args: [serde]

Makefile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ fmt:
3131

3232
check:
3333
$(POETRY) run pre-commit run -a
34-
$(POETRY) run mypy .
3534

3635
docs:
3736
mkdir -p docs out/api out/guide/en

bench/attrs_class.py

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import json
22
from functools import partial
3-
from typing import Union
3+
from typing import Any, Union
44

55
import attr
66
import data
@@ -20,27 +20,71 @@ class Medium:
2020
inner: list[Small] = attr.Factory(list)
2121

2222

23+
@attr.s(auto_attribs=True)
24+
class Large:
25+
customer_id: int
26+
name: str
27+
email: str
28+
preferences: dict[str, Union[str, bool, int]] = attr.Factory(dict)
29+
items_list: list[str] = attr.Factory(list)
30+
nested_data: dict[str, list[int]] = attr.Factory(dict)
31+
loyalty_points: int = 0
32+
created_at: str = ""
33+
34+
2335
SMALL = Small(**data.args_sm)
2436

2537
MEDIUM = Medium([Small(**d) for d in data.args_md])
2638

2739

40+
# Create Large instance
41+
def create_large_instance() -> Large:
42+
return Large(
43+
customer_id=12345,
44+
name="John Smith",
45+
46+
preferences={
47+
"theme": "dark",
48+
"notifications": True,
49+
"language": "en",
50+
"max_budget": 5000,
51+
"auto_renew": False,
52+
"privacy_level": 3,
53+
},
54+
items_list=["laptop", "mouse", "keyboard", "monitor", "speakers"] * 20, # 100 items
55+
nested_data={
56+
"category_1": list(range(50)),
57+
"category_2": list(range(50, 100)),
58+
"category_3": list(range(100, 150)),
59+
"category_4": list(range(150, 200)),
60+
"category_5": list(range(200, 250)),
61+
},
62+
loyalty_points=1250,
63+
created_at="2024-01-15T10:30:00Z",
64+
)
65+
66+
67+
LARGE = create_large_instance()
68+
69+
2870
def new(size: Size) -> Runner:
2971
name = "attrs"
3072
if size == Size.Small:
3173
unp = SMALL
3274
elif size == Size.Medium:
33-
unp = MEDIUM
75+
unp = MEDIUM # type: ignore[assignment]
76+
elif size == Size.Large:
77+
unp = LARGE # type: ignore[assignment]
3478
return Runner(name, unp, partial(se, unp), None, partial(astuple, unp), partial(asdict, unp))
3579

3680

37-
def se(obj: Union[Small, Medium]):
81+
def se(obj: Union[Small, Medium, Large]) -> str:
3882
return json.dumps(attr.asdict(obj))
3983

4084

41-
def astuple(obj: Union[Small, Medium]):
85+
def astuple(obj: Union[Small, Medium, Large]) -> tuple[Any, ...]:
4286
return attr.astuple(obj)
4387

4488

45-
def asdict(obj: Union[Small, Medium]):
89+
def asdict(obj: Union[Small, Medium, Large]) -> dict[str, Any]:
4690
return attr.asdict(obj)

bench/bench.py

Lines changed: 102 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@
99
from functools import partial
1010
from pathlib import Path
1111
from platform import python_implementation
12-
from typing import Any, Callable, Dict, List, Tuple, Union
12+
from collections.abc import Callable
13+
from typing import Any, Optional, Union
1314

1415
import click
1516
import data
1617
import dataclasses_class as dc
1718
import pyserde_class as ps # noqa: F401
19+
import pyserde_nt_class as pn # noqa: F401
1820
import raw # noqa: F401
1921
from runner import Size
2022

@@ -25,9 +27,7 @@
2527
import dacite_class as da # noqa: F401
2628
import marshmallow_class as ms # noqa: F401
2729
import mashumaro_class as mc # noqa: F401
28-
import matplotlib.pyplot as plt
29-
import numpy as np
30-
import seaborn as sns
30+
import pydantic_class as pd # noqa: F401
3131
except ImportError:
3232
pass
3333

@@ -42,7 +42,7 @@ class Opt:
4242
chart: bool
4343
output: Path
4444

45-
def __post_init__(self):
45+
def __post_init__(self) -> None:
4646
if not self.output.exists():
4747
self.output.mkdir()
4848

@@ -53,18 +53,26 @@ class Bencher:
5353
opt: Opt
5454
number: int = 10000
5555
repeat: int = 5
56-
result: List[Tuple[str, float]] = field(default_factory=list)
56+
result: list[tuple[str, float]] = field(default_factory=list)
5757

58-
def run(self, name, func, expected=None, **kwargs):
58+
def run(
59+
self,
60+
name: str,
61+
func: Optional[Callable[..., Any]],
62+
expected: Optional[Union[Any, Callable[[Any], bool]]] = None,
63+
**kwargs: Any,
64+
) -> None:
5965
"""
6066
Run benchmark.
6167
"""
68+
if not func:
69+
return
70+
71+
f: Callable[[], Any]
6272
if kwargs:
6373
f = partial(func, **kwargs)
6474
else:
6575
f = func
66-
if not f:
67-
return
6876

6977
# Evaluate result only once.
7078
if expected:
@@ -76,36 +84,43 @@ def run(self, name, func, expected=None, **kwargs):
7684

7785
times = timeit.repeat(f, number=self.number, repeat=self.repeat)
7886
self.result.append((name, sum(times) / len(times)))
79-
times = ", ".join([f"{t:.6f}" for t in times])
80-
click.echo(f"{name:40s}\t{times}")
87+
times_str = ", ".join([f"{t:.6f}" for t in times])
88+
click.echo(f"{name:40s}\t{times_str}")
8189
self.draw_chart()
8290

83-
def draw_chart(self):
91+
def draw_chart(self) -> None:
8492
if self.opt.chart:
85-
x = np.array([r[0] for r in self.result])
86-
y = np.array([r[1] for r in self.result])
87-
chart = sns.barplot(x=x, y=y, palette="rocket")
88-
chart.set(ylabel=f"Elapsed time for {self.number} requests [sec]")
89-
for p in chart.patches:
90-
chart.annotate(
91-
format(p.get_height(), ".4f"),
92-
(p.get_x() + p.get_width() / 2.0, p.get_height()),
93-
ha="center",
94-
va="center",
95-
xytext=(0, 10),
96-
textcoords="offset points",
97-
)
98-
plt.xticks(rotation=20)
99-
plt.savefig(str(self.opt.output / f"{self.name}.png"))
100-
plt.close()
101-
102-
103-
runners_base = ("raw", "dc", "ps")
104-
105-
runners_extra = ("da", "mc", "ms", "at", "ca")
106-
107-
108-
def run(opt: Opt, name: str, tc: TestCase):
93+
try:
94+
import matplotlib.pyplot as plt
95+
import numpy as np
96+
import seaborn as sns
97+
98+
x = np.array([r[0] for r in self.result])
99+
y = np.array([r[1] for r in self.result])
100+
chart = sns.barplot(x=x, y=y, palette="rocket")
101+
chart.set(ylabel=f"Elapsed time for {self.number} requests [sec]")
102+
for p in chart.patches:
103+
chart.annotate(
104+
format(p.get_height(), ".4f"),
105+
(p.get_x() + p.get_width() / 2.0, p.get_height()),
106+
ha="center",
107+
va="center",
108+
xytext=(0, 10),
109+
textcoords="offset points",
110+
)
111+
plt.xticks(rotation=20)
112+
plt.savefig(str(self.opt.output / f"{self.name}.png"))
113+
plt.close()
114+
except ImportError:
115+
pass
116+
117+
118+
runners_base = ("raw", "dc", "ps", "pn")
119+
120+
runners_extra = ("da", "mc", "ms", "at", "ca", "pd")
121+
122+
123+
def run(opt: Opt, name: str, tc: TestCase) -> None:
109124
"""
110125
Run benchmark.
111126
"""
@@ -124,41 +139,87 @@ class TestCase:
124139
number: int
125140

126141
@classmethod
127-
def make(cls, size: Size, expected=None, number=10000) -> Dict[Size, TestCase]:
142+
def make(cls, size: Size, expected: Any = None, number: int = 10000) -> dict[Size, TestCase]:
128143
return {size: TestCase(size, expected, number)}
129144

130145

131-
def equals_small(x):
146+
def equals_small(x: Any) -> None:
132147
y = dc.SMALL
133148
assert x.i == y.i and x.s == y.s and x.f == y.f and x.b == y.b, f"Expected: {x}, Actual: {y}"
134149

135150

136-
def equals_medium(x):
151+
def equals_medium(x: Any) -> None:
137152
y = dc.MEDIUM
138153
for xs, ys in zip(x.inner, y.inner, strict=True):
139154
assert (
140155
xs.i == xs.i and xs.s == ys.s and xs.f == ys.f and xs.b == ys.b
141156
), f"Expected: {x}, Actual: {y}"
142157

143158

159+
def equals_large(x: Any) -> None:
160+
"""Validate large deserialized data structure"""
161+
# Use different reference based on the library type
162+
if hasattr(type(x), "model_fields"): # Pydantic model
163+
try:
164+
import pydantic_class as pd
165+
166+
y = pd.LARGE
167+
except ImportError:
168+
y = ps.LARGE
169+
else:
170+
y = ps.LARGE
171+
172+
assert (
173+
x.customer_id == y.customer_id
174+
), f"Customer ID mismatch: {x.customer_id} != {y.customer_id}"
175+
assert x.name == y.name, f"Name mismatch: {x.name} != {y.name}"
176+
assert x.email == y.email, f"Email mismatch: {x.email} != {y.email}"
177+
assert len(x.items_list) == len(
178+
y.items_list
179+
), f"Items list length mismatch: {len(x.items_list)} != {len(y.items_list)}"
180+
assert len(x.nested_data) == len(
181+
y.nested_data
182+
), f"Nested data keys mismatch: {len(x.nested_data)} != {len(y.nested_data)}"
183+
assert (
184+
x.loyalty_points == y.loyalty_points
185+
), f"Loyalty points mismatch: {x.loyalty_points} != {y.loyalty_points}"
186+
assert x.created_at == y.created_at, f"Created at mismatch: {x.created_at} != {y.created_at}"
187+
188+
# Validate preferences
189+
assert x.preferences["theme"] == y.preferences["theme"], "Theme preference mismatch"
190+
assert (
191+
x.preferences["notifications"] == y.preferences["notifications"]
192+
), "Notifications preference mismatch"
193+
194+
# Validate nested data structure
195+
assert "category_1" in x.nested_data, "category_1 not found in nested_data"
196+
assert (
197+
len(x.nested_data["category_1"]) == 50
198+
), f"category_1 length mismatch: {len(x.nested_data['category_1'])}"
199+
200+
144201
TESTCASES = {
145202
"se": {
146203
**TestCase.make(Size.Small, lambda x: json.loads(x) == json.loads(data.SMALL)),
147204
**TestCase.make(
148205
Size.Medium, lambda x: json.loads(x) == json.loads(data.MEDIUM), number=500
149206
),
207+
**TestCase.make(Size.Large, lambda x: json.loads(x) == json.loads(data.LARGE), number=100),
150208
},
151209
"de": {
152210
**TestCase.make(Size.Small, equals_small),
153211
**TestCase.make(Size.Medium, equals_medium, number=500),
212+
**TestCase.make(Size.Large, equals_large, number=100),
154213
},
155214
"astuple": {
156215
**TestCase.make(Size.Small, data.SMALL_TUPLE),
157216
**TestCase.make(Size.Medium, number=500),
217+
**TestCase.make(Size.Large, number=100),
158218
},
159219
"asdict": {
160220
**TestCase.make(Size.Small, data.SMALL_DICT),
161221
**TestCase.make(Size.Medium, number=500),
222+
**TestCase.make(Size.Large, number=100),
162223
},
163224
}
164225

@@ -183,7 +244,7 @@ def equals_medium(x):
183244
callback=lambda _, __, p: Path(p),
184245
help="Output directory for charts.",
185246
)
186-
def main(full: bool, test: str, chart: bool, output: Path):
247+
def main(full: bool, test: str, chart: bool, output: Path) -> None:
187248
"""
188249
bench.py - Benchmarking pyserde and other libraries.
189250
"""
@@ -199,4 +260,4 @@ def main(full: bool, test: str, chart: bool, output: Path):
199260

200261

201262
if __name__ == "__main__":
202-
main()
263+
main() # pyright: ignore[reportCallIssue]

0 commit comments

Comments
 (0)