Skip to content

Commit 71730a9

Browse files
feat: added Column.summarize_statistics() (#715)
Closes #701 ### Summary of Changes Added `summarize_statistics` to the `Column` class to quickly get an overview of relevant statistics. The Column is converted into a Table with one Column and the results from Table.summarize_statistics() are returned. This way, if someone adds a new feature to Table.summarize_statistics(), it also appears in Column.summarize_statistics(). --------- Co-authored-by: Lars Reimann <[email protected]>
1 parent f2f4418 commit 71730a9

File tree

2 files changed

+139
-0
lines changed

2 files changed

+139
-0
lines changed

Diff for: src/safeds/data/tabular/containers/_column.py

+33
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,39 @@ def transform(self, transformer: Callable[[T], R]) -> Column[R]:
566566
# Statistics
567567
# ------------------------------------------------------------------------------------------------------------------
568568

569+
def summarize_statistics(self) -> Table:
570+
"""
571+
Return a table with a number of statistical key values.
572+
573+
The original Column is not modified.
574+
575+
Returns
576+
-------
577+
statistics:
578+
The table with statistics.
579+
580+
Examples
581+
--------
582+
>>> from safeds.data.tabular.containers import Column
583+
>>> column = Column("a", [1, 3])
584+
>>> column.summarize_statistics()
585+
metric a
586+
0 minimum 1
587+
1 maximum 3
588+
2 mean 2.0
589+
3 mode [1, 3]
590+
4 median 2.0
591+
5 variance 2.0
592+
6 standard deviation 1.4142135623730951
593+
7 missing value count 0
594+
8 missing value ratio 0.0
595+
9 idness 1.0
596+
10 stability 0.5
597+
"""
598+
from safeds.data.tabular.containers import Table
599+
600+
return Table({self._name: self._data}).summarize_statistics()
601+
569602
def correlation_with(self, other_column: Column) -> float:
570603
"""
571604
Calculate Pearson correlation between this and another column. Both columns have to be numerical.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
from statistics import stdev
2+
3+
import pytest
4+
from safeds.data.tabular.containers import Column, Table
5+
6+
7+
@pytest.mark.parametrize(
8+
("column", "expected"),
9+
[
10+
(
11+
Column("col1", [1, 2, 1]),
12+
Table(
13+
{
14+
"metric": [
15+
"minimum",
16+
"maximum",
17+
"mean",
18+
"mode",
19+
"median",
20+
"variance",
21+
"standard deviation",
22+
"missing value count",
23+
"missing value ratio",
24+
"idness",
25+
"stability",
26+
],
27+
"col1": [
28+
"1",
29+
"2",
30+
str(4.0 / 3),
31+
"[1]",
32+
"1.0",
33+
str(1.0 / 3),
34+
str(stdev([1, 2, 1])),
35+
"0",
36+
"0.0",
37+
str(2.0 / 3),
38+
str(2.0 / 3),
39+
],
40+
},
41+
),
42+
),
43+
(
44+
Column("col1", ["a", "b", "c"]),
45+
Table(
46+
{
47+
"metric": [
48+
"minimum",
49+
"maximum",
50+
"mean",
51+
"mode",
52+
"median",
53+
"variance",
54+
"standard deviation",
55+
"missing value count",
56+
"missing value ratio",
57+
"idness",
58+
"stability",
59+
],
60+
"col1": [
61+
"-",
62+
"-",
63+
"-",
64+
"['a', 'b', 'c']",
65+
"-",
66+
"-",
67+
"-",
68+
"0",
69+
"0.0",
70+
"1.0",
71+
str(1.0 / 3),
72+
],
73+
},
74+
),
75+
),
76+
(
77+
Column("col", [None, None]),
78+
Table(
79+
{
80+
"metric": [
81+
"minimum",
82+
"maximum",
83+
"mean",
84+
"mode",
85+
"median",
86+
"variance",
87+
"standard deviation",
88+
"missing value count",
89+
"missing value ratio",
90+
"idness",
91+
"stability",
92+
],
93+
"col": ["-", "-", "-", "[]", "-", "-", "-", "2", "1.0", "0.0", "-"],
94+
},
95+
),
96+
),
97+
],
98+
ids=[
99+
"Column of integers",
100+
"Column of characters",
101+
"Column of None",
102+
],
103+
)
104+
def test_should_summarize_statistics(column: Column, expected: Table) -> None:
105+
assert column.summarize_statistics().schema == expected.schema
106+
assert column.summarize_statistics() == expected

0 commit comments

Comments
 (0)