Skip to content

Commit 8a3128e

Browse files
authored
0.4.2 (#44)
* Add a set of functions from r-base * Rename datar_versions to get_versions * Add varname to requirements to close #30 * 0.4.2
1 parent 36cca70 commit 8a3128e

24 files changed

+776
-136
lines changed

datar/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
from .core import _frame_format_patch
77
from .core.defaults import f
88

9-
__all__ = ('f', 'datar_versions')
10-
__version__ = "0.4.1"
9+
__all__ = ('f', 'get_versions')
10+
__version__ = "0.4.2"
1111

12-
def datar_versions(
12+
def get_versions(
1313
prnt: bool = True
1414
) -> Mapping[str, str]: # pragma: no cover
1515
"""Print or return related versions which help for bug reporting.

datar/base/__init__.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,20 @@
2525
round as round_,
2626
sum as sum_,
2727
abs as abs_,
28+
prod,
29+
sign,
30+
signif,
31+
trunc,
32+
exp,
33+
log,
34+
log2,
35+
log10,
36+
log1p
2837
)
2938
from .bessel import bessel_i, bessel_j, bessel_k, bessel_y
3039
from .casting import as_double, as_float, as_int, as_integer, as_numeric
3140
from .complex import arg, as_complex, conj, im, is_complex, mod, re as re_
32-
from .constants import LETTERS, Inf, letters, month_abb, month_name, pi
41+
from .constants import LETTERS, letters, month_abb, month_name, pi
3342
from .cum import cummax, cummin, cumprod, cumsum
3443
from .date import as_date
3544
from .factor import (
@@ -52,7 +61,7 @@
5261
is_logical,
5362
is_true,
5463
)
55-
from .na import NA, NaN, any_na, is_na
64+
from .na import NA, NaN, any_na, is_na, Inf, is_finite, is_infinite, is_nan
5665
from .null import NULL, as_null, is_null
5766
from .random import set_seed
5867
from .seq import (
@@ -66,6 +75,7 @@
6675
seq_along,
6776
seq_len,
6877
unique,
78+
match
6979
)
7080
from .special import (
7181
beta,
@@ -99,6 +109,12 @@
99109
sub,
100110
substr,
101111
substring,
112+
startswith,
113+
endswith,
114+
strtoi,
115+
chartr,
116+
tolower,
117+
toupper,
102118
)
103119
from .table import table
104120
from .testing import (
@@ -145,6 +161,7 @@
145161
setequal,
146162
t,
147163
union,
164+
max_col,
148165
)
149166
from .which import which, which_max, which_min
150167

datar/base/arithmetic.py

Lines changed: 144 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,16 @@
66
from pandas import DataFrame, Series
77
from pipda import register_func, register_verb
88

9-
from ..core.contexts import Context
10-
from ..core.types import NumericOrIter, NumericType, is_not_null, is_scalar
11-
from ..core.utils import Array, register_numpy_func_x, recycle_value, length_of
129
from ..core.collections import Collection
10+
from ..core.contexts import Context
11+
from ..core.types import (
12+
FloatOrIter,
13+
NumericOrIter,
14+
NumericType,
15+
is_not_null,
16+
is_scalar
17+
)
18+
from ..core.utils import Array, length_of, recycle_value, register_numpy_func_x
1319

1420
# cor?, range, summary, iqr
1521

@@ -50,6 +56,20 @@ def _arithmetric(x: Iterable, na_rm: bool = False) -> Iterable:
5056
""",
5157
)
5258

59+
prod = _register_arithmetic_agg(
60+
"prod",
61+
"prod",
62+
doc="""Product of the input.
63+
64+
Args:
65+
x: The input
66+
na_rm: Exclude the NAs
67+
68+
Returns:
69+
The product of the input
70+
""",
71+
)
72+
5373
mean = _register_arithmetic_agg(
5474
"mean",
5575
"mean",
@@ -161,6 +181,7 @@ def pmax(*x: Iterable, na_rm: bool = False) -> Iterable[float]:
161181
@register_func(None, context=Context.EVAL)
162182
def round(x: NumericOrIter, ndigits: int = 0) -> NumericOrIter:
163183
"""Rounding a number"""
184+
# recycle ndigits?
164185
return numpy.round(x, ndigits)
165186

166187

@@ -190,6 +211,33 @@ def round(x: NumericOrIter, ndigits: int = 0) -> NumericOrIter:
190211
""",
191212
)
192213

214+
sign = register_numpy_func_x(
215+
"sign",
216+
"sign",
217+
doc="""Get the signs of the corresponding elements of x
218+
219+
Args:
220+
x: The input
221+
222+
Returns:
223+
The signs of the corresponding elements of x
224+
""",
225+
)
226+
227+
trunc = register_numpy_func_x(
228+
"trunc",
229+
"trunc",
230+
doc="""Get the integers truncated for each element in x
231+
232+
Args:
233+
x: The input
234+
235+
Returns:
236+
The ingeters of elements in x being truncated
237+
Note the dtype is still float.
238+
""",
239+
)
240+
193241
ceiling = register_numpy_func_x(
194242
"ceiling",
195243
"ceil",
@@ -216,6 +264,27 @@ def round(x: NumericOrIter, ndigits: int = 0) -> NumericOrIter:
216264
""",
217265
)
218266

267+
@register_func(None, context=Context.EVAL)
268+
def signif(
269+
x: NumericOrIter,
270+
digits: int = 6
271+
) -> NumericOrIter:
272+
"""Rounds the values in its first argument to the specified number of
273+
significant digits
274+
275+
Args:
276+
x: A numeric vector or scalar
277+
digits: integer indicating the number of significant digits to be used
278+
279+
Returns:
280+
The rounded values for each element in x
281+
"""
282+
# todo complex?
283+
return numpy.fromiter(
284+
(round(elem, digits - int(ceiling(log10(abs(elem))))) for elem in x),
285+
dtype=float
286+
)
287+
219288
# pylint: disable=unused-argument
220289
@register_verb(DataFrame, context=Context.EVAL)
221290
def cov(x: DataFrame, y: Iterable = None, ddof: int = 1) -> DataFrame:
@@ -501,3 +570,75 @@ def row_medians(
501570
The medians by row.
502571
"""
503572
return x.agg(median, axis=1, na_rm=na_rm)
573+
574+
@register_func(None, context=Context.EVAL)
575+
def log(x: NumericOrIter, base: float = numpy.e) -> FloatOrIter:
576+
"""Computes logarithms, by default natural logarithm
577+
578+
Args:
579+
x: A numeric scalar or vector
580+
base: The base of the logarithm
581+
582+
Returns:
583+
The value of the logarithm if x is scalar, otherwise element-wise
584+
logarithm of elements in x
585+
"""
586+
if base == numpy.e:
587+
return numpy.log(x)
588+
589+
return numpy.log(x) / numpy.log(base)
590+
591+
exp = register_numpy_func_x(
592+
"exp",
593+
"exp",
594+
doc="""Calculates the power of natural number
595+
596+
Args:
597+
x: A numeric scalar or vector
598+
599+
Returns:
600+
Power of natural number of element-wise power of natural number for x
601+
"""
602+
)
603+
604+
log2 = register_numpy_func_x(
605+
"log2",
606+
"log2",
607+
doc="""Computes logarithms with base 2
608+
609+
Args:
610+
x: A numeric scalar or vector
611+
612+
Returns:
613+
The value of log2 if x is scalar, otherwise element-wise
614+
log2 of elements in x
615+
"""
616+
)
617+
618+
log10 = register_numpy_func_x(
619+
"log10",
620+
"log10",
621+
doc="""Computes logarithms with base 10
622+
623+
Args:
624+
x: A numeric scalar or vector
625+
626+
Returns:
627+
The value of log10 if x is scalar, otherwise element-wise
628+
log10 of elements in x
629+
"""
630+
)
631+
632+
log1p = register_numpy_func_x(
633+
"log1p",
634+
"log1p",
635+
doc="""Computes log(1+x)
636+
637+
Args:
638+
x: A numeric scalar or vector
639+
640+
Returns:
641+
The value of log(1+x) if x is scalar, otherwise element-wise
642+
log(1+x) of elements in x
643+
"""
644+
)

datar/base/constants.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
# pylint: disable=invalid-name
88

99
pi = math.pi
10-
Inf = numpy.inf
1110

1211
letters = numpy.array(list(ascii_letters[:26]))
1312
LETTERS = numpy.array(list(ascii_letters[26:]))

datar/base/factor.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,26 @@ def levels(x: Any) -> ArrayLikeType:
4545
x: The categorical data
4646
4747
Returns:
48-
levels of the categorical data
48+
levels of the categorical
49+
None if x is not an categorical/factor
4950
"""
5051
if not is_categorical_(x):
5152
return None
5253

5354
return categorized(x).categories
5455

56+
@register_func(None, context=Context.EVAL)
57+
def nlevels(x: Any) -> int:
58+
"""Get the number of levels of a factor
59+
60+
Args:
61+
x: The data to get number of levels of
62+
63+
Returns:
64+
Number of levels if x is a categorical/factor; otherwise 0
65+
"""
66+
lvls = levels(x)
67+
return 0 if lvls is None else len(lvls)
5568

5669
def factor(
5770
x: Iterable[Any] = None,

datar/base/funs.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
registered by `register_verb` and should be placed in `./verbs.py`
55
"""
66
import itertools
7-
from typing import Any, Iterable
7+
from typing import Any, Callable, Iterable, Union
8+
import numpy
89

910
import pandas
1011
from pandas import Categorical, DataFrame
@@ -86,6 +87,25 @@ def expandgrid(*args: Iterable[Any], **kwargs: Iterable[Any]) -> DataFrame:
8687
list(itertools.product(*iters.values())), columns=iters.keys()
8788
)
8889

90+
@register_func(None, context=Context.EVAL)
91+
def outer(x, y, fun: Union[str, Callable] = "*") -> DataFrame:
92+
"""Compute the outer product of two vectors.
93+
94+
Args:
95+
x: The first vector
96+
y: The second vector
97+
fun: The function to handle how the result of the elements from
98+
the first and second vectors should be computed.
99+
The function has to be vectorized at the second argument, and
100+
return the same shape as y.
101+
102+
Returns:
103+
The data frame of the outer product of x and y
104+
"""
105+
if fun == "*":
106+
return DataFrame(numpy.outer(x, y))
107+
108+
return DataFrame([fun(xelem, y) for xelem in x])
89109

90110
# ---------------------------------
91111
# Plain functions

datar/base/na.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88
from ..core.contexts import Context
99
from ..core.types import is_null, is_scalar
1010
from ..core.defaults import NA_REPR
11+
from ..core.utils import register_numpy_func_x
1112

1213
# pylint: disable=invalid-name
1314
NA = numpy.nan
1415
NaN = NA
16+
Inf = numpy.inf
1517

1618
# Just for internal and testing uses
1719
NA_character_ = NA_REPR
@@ -58,3 +60,45 @@ def any_na(x: Any, recursive: bool = False) -> bool:
5860
if any_na(elem, recursive=True):
5961
return True
6062
return out
63+
64+
is_infinite = register_numpy_func_x(
65+
"is_infinite",
66+
"isinf",
67+
doc="""Check if a value or values are infinite numbers
68+
69+
Args:
70+
x: The value to check
71+
72+
Returns:
73+
True if the value is infinite, False otherwise
74+
For iterable values, returns the element-wise results
75+
"""
76+
)
77+
78+
is_finite = register_numpy_func_x(
79+
"is_finite",
80+
"isfinite",
81+
doc="""Check if a value or values are finite numbers
82+
83+
Args:
84+
x: The value to check
85+
86+
Returns:
87+
True if the value is finite, False otherwise
88+
For iterable values, returns the element-wise results
89+
"""
90+
)
91+
92+
is_nan = register_numpy_func_x(
93+
"is_nan",
94+
"isnan",
95+
doc="""Check if a value or values are NaNs
96+
97+
Args:
98+
x: The value to check
99+
100+
Returns:
101+
True if the value is nan, False otherwise
102+
For iterable values, returns the element-wise results
103+
"""
104+
)

0 commit comments

Comments
 (0)