Skip to content

Objectable filter, astype_array #183

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Jun 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ What is New in ArrayKit

Now building free-threaded compatible wheels for Python 3.13.

Added ``is_objectable()`` and ``is_objectable_dt64()``.

Added ``astype_array()``.


1.0.9
Expand Down
5 changes: 4 additions & 1 deletion src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@
from ._arraykit import array_to_tuple_array as array_to_tuple_array
from ._arraykit import array_to_tuple_iter as array_to_tuple_iter
from ._arraykit import nonzero_1d as nonzero_1d

from ._arraykit import is_objectable_dt64 as is_objectable_dt64
from ._arraykit import is_objectable as is_objectable
from ._arraykit import astype_array as astype_array
from ._arraykit import AutoMap as AutoMap
from ._arraykit import FrozenAutoMap as FrozenAutoMap
from ._arraykit import NonUniqueError as NonUniqueError

3 changes: 3 additions & 0 deletions src/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@ def get_new_indexers_and_screen(indexers: np.ndarray, positions: np.ndarray) ->
def first_true_1d(__array: np.ndarray, *, forward: bool) -> int: ...
def first_true_2d(__array: np.ndarray, *, forward: bool, axis: int) -> np.ndarray: ...
def nonzero_1d(__array: np.ndarray, /) -> np.ndarray: ...
def is_objectable_dt64(__array: np.ndarray, /) -> bool: ...
def is_objectable(__array: np.ndarray, /) -> bool: ...
def astype_array(__array: np.ndarray, __dtype: np.dtype | None, /) -> np.ndarray: ...
def slice_to_ascending_slice(__slice: slice, __size: int) -> slice: ...
def array_to_tuple_array(__array: np.ndarray) -> np.ndarray: ...
def array_to_tuple_iter(__array: np.ndarray) -> tp.Iterator[tp.Tuple[tp.Any, ...]]: ...
22 changes: 20 additions & 2 deletions src/_arraykit.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ static PyMethodDef arraykit_methods[] = {
NULL},
{"count_iteration", count_iteration, METH_O, NULL},
{"nonzero_1d", nonzero_1d, METH_O, NULL},
{"is_objectable_dt64", is_objectable_dt64, METH_O, NULL},
{"is_objectable", is_objectable, METH_O, NULL},
{"astype_array", astype_array, METH_VARARGS, NULL},
{"isna_element",
(PyCFunction)isna_element,
METH_VARARGS | METH_KEYWORDS,
Expand Down Expand Up @@ -95,6 +98,7 @@ PyInit__arraykit(void)
return NULL;
}

// store a reference to the deepcopy function
PyObject *copy = PyImport_ImportModule("copy");
if (copy == NULL) {
return NULL;
Expand All @@ -105,6 +109,18 @@ PyInit__arraykit(void)
return NULL;
}

// store a year dtype object
PyObject* dt_year_str = PyUnicode_FromString("datetime64[Y]");
if (!dt_year_str) return NULL;

PyArray_Descr* dt_year = NULL;
if (!PyArray_DescrConverter2(dt_year_str, &dt_year)) {
Py_DECREF(dt_year_str);
return NULL;
}
Py_DECREF(dt_year_str);


PyObject *m = PyModule_Create(&arraykit_module);
if (!m ||
PyModule_AddStringConstant(m, "__version__", Py_STRINGIFY(AK_VERSION)) ||
Expand All @@ -128,9 +144,11 @@ PyInit__arraykit(void)
PyModule_AddObject(m, "ErrorInitTypeBlocks", ErrorInitTypeBlocks) ||
PyModule_AddObject(m, "AutoMap", (PyObject *)&AMType) ||
PyModule_AddObject(m, "FrozenAutoMap", (PyObject *)&FAMType) ||
PyModule_AddObject(m, "NonUniqueError", NonUniqueError)
PyModule_AddObject(m, "NonUniqueError", NonUniqueError) ||
PyModule_AddObject(m, "dt_year", (PyObject *)dt_year)
){
Py_DECREF(deepcopy);
Py_XDECREF(deepcopy);
Py_XDECREF(dt_year);
Py_XDECREF(m);
return NULL;
}
Expand Down
133 changes: 133 additions & 0 deletions src/methods.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,139 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) {
return AK_nonzero_1d(array);
}

PyObject*
is_objectable_dt64(PyObject *m, PyObject *a) {
AK_CHECK_NUMPY_ARRAY(a);
PyArrayObject* array = (PyArrayObject*)a;

// this returns a new reference
PyObject* dt_year = PyObject_GetAttrString(m, "dt_year");
int is_objectable = AK_is_objectable_dt64(array, dt_year);
Py_DECREF(dt_year);

switch (is_objectable) {
case -1:
return NULL;
case 0:
Py_RETURN_FALSE;
case 1:
Py_RETURN_TRUE;
}
return NULL;
}


PyObject*
is_objectable(PyObject *m, PyObject *a) {
AK_CHECK_NUMPY_ARRAY(a);
PyArrayObject* array = (PyArrayObject*)a;

char kind = PyArray_DESCR(array)->kind;
if ((kind == 'M' || kind == 'm')) {
// this returns a new reference
PyObject* dt_year = PyObject_GetAttrString(m, "dt_year");
int is_objectable = AK_is_objectable_dt64(array, dt_year);
Py_DECREF(dt_year);

switch (is_objectable) {
case -1:
return NULL;
case 0:
Py_RETURN_FALSE;
case 1:
Py_RETURN_TRUE;
}
}
Py_RETURN_TRUE;
}

// Convert array to the dtype provided. NOTE: mutable arrays will be returned unless the input array is immutable and no dtype change is needed
PyObject*
astype_array(PyObject* m, PyObject* args) {

PyObject* a = NULL;
PyObject* dtype_spec = Py_None;

if (!PyArg_ParseTuple(args, "O!|O:astype_array",
&PyArray_Type, &a,
&dtype_spec)) {
return NULL;
}
PyArrayObject* array = (PyArrayObject*)a;

PyArray_Descr* dtype = NULL;
if (dtype_spec == Py_None) {
dtype = PyArray_DescrFromType(NPY_DEFAULT_TYPE);
} else {
if (!PyArray_DescrConverter(dtype_spec, &dtype)) {
return NULL;
}
}

if (PyArray_EquivTypes(PyArray_DESCR(array), dtype)) {
Py_DECREF(dtype);

if (PyArray_ISWRITEABLE(array)) {
PyObject* result = PyArray_NewCopy(array, NPY_ANYORDER);
if (!result) {
return NULL;
}
return result;
}
else { // already immutable
Py_INCREF(a);
return a;
}
}
// if converting to an object
if (dtype->type_num == NPY_OBJECT) {
char kind = PyArray_DESCR(array)->kind;
if ((kind == 'M' || kind == 'm')) {
PyObject* dt_year = PyObject_GetAttrString(m, "dt_year");
int is_objectable = AK_is_objectable_dt64(array, dt_year);
Py_DECREF(dt_year);

if (!is_objectable) {
PyObject* result = PyArray_NewLikeArray(array, NPY_ANYORDER, dtype, 0);
if (!result) {
Py_DECREF(dtype);
return NULL;
}
PyObject** data = (PyObject**)PyArray_DATA((PyArrayObject*)result);

PyArrayIterObject* it = (PyArrayIterObject*)PyArray_IterNew(a);
if (!it) {
Py_DECREF(result);
return NULL;
}

npy_intp i = 0;
while (it->index < it->size) {
PyObject* item = PyArray_ToScalar(it->dataptr, array);
if (!item) {
Py_DECREF(result);
Py_DECREF(it);
return NULL;
}
data[i++] = item;
PyArray_ITER_NEXT(it);
}
Py_DECREF(it);
return result;
}
}
}
// all other cases: do a standard cast conversion
PyObject* result = PyArray_CastToType(array, dtype, 0);
if (!result) {
Py_DECREF(dtype);
return NULL;
}
return result;
}



static char *first_true_1d_kwarg_names[] = {
"array",
"forward",
Expand Down
9 changes: 9 additions & 0 deletions src/methods.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg);
PyObject *
nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a);

PyObject *
is_objectable_dt64(PyObject *m, PyObject *a);

PyObject *
is_objectable(PyObject *m, PyObject *a);

PyObject *
astype_array(PyObject *m, PyObject *args);

PyObject *
first_true_1d(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs);

Expand Down
72 changes: 63 additions & 9 deletions src/utilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,69 @@ AK_slice_to_ascending_slice(PyObject* slice, Py_ssize_t size)
-step);
}


static inline NPY_DATETIMEUNIT
AK_dt_unit_from_array(PyArrayObject* a) {
// This is based on get_datetime_metadata_from_dtype in the NumPy source, but that function is private. This does not check that the dtype is of the appropriate type.
PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref
PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dt))->meta);
return dma->base;
}

// Given a dt64 array, determine if it can be cast to a object without data loss. Returns -1 on error. NOTE: if we use dt_year, must incref first
static inline int
AK_is_objectable_dt64(PyArrayObject* a, PyObject* dt_year)
{
NPY_DATETIMEUNIT unit = AK_dt_unit_from_array(a);
switch (unit) {
case NPY_FR_ERROR:
case NPY_FR_Y:
case NPY_FR_M:
case NPY_FR_W:
return false;
case NPY_FR_D:
case NPY_FR_h:
case NPY_FR_m:
case NPY_FR_s:
case NPY_FR_ms:
case NPY_FR_us:
break;
case NPY_FR_ns:
case NPY_FR_ps:
case NPY_FR_fs:
case NPY_FR_as:
case NPY_FR_GENERIC:
return false;
}

Py_INCREF(dt_year);
PyObject* a_year = PyArray_CastToType(a, (PyArray_Descr*)dt_year, 0);
if (!a_year) {
Py_DECREF(dt_year);
return -1;
}

npy_int64* data = (npy_int64*)PyArray_DATA((PyArrayObject*)a_year);
npy_intp size = PyArray_SIZE((PyArrayObject*)a_year);

for (npy_intp i = 0; i < size; ++i) {
npy_int64 v = data[i];
if (v == NPY_DATETIME_NAT) {
continue;
}
// offset: 1-1970, 9999-1970
if (v < -1969 || v > 8029) {
Py_DECREF(a_year);
return 0;
}
}
Py_DECREF(a_year);
return 1;
}




// Given a Boolean, contiguous 1D array, return the index positions in an int64 array. Through experimentation it has been verified that doing full-size allocation of memory provides the best performance at all scales. Using NpyIter, or using, bit masks does not improve performance over pointer arithmetic. Prescanning for all empty is very effective. Note that NumPy benefits from first counting the nonzeros, then allocating only enough data for the expexted number of indices.
static inline PyObject *
AK_nonzero_1d(PyArrayObject* array) {
Expand Down Expand Up @@ -319,15 +382,6 @@ AK_nonzero_1d(PyArrayObject* array) {
return final;
}

static inline NPY_DATETIMEUNIT
AK_dt_unit_from_array(PyArrayObject* a) {
// This is based on get_datetime_metadata_from_dtype in the NumPy source, but that function is private. This does not check that the dtype is of the appropriate type.
PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref
PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyDataType_C_METADATA(dt))->meta);
// PyArray_DatetimeMetaData* dma = &(((PyArray_DatetimeDTypeMetaData *)PyArray_DESCR(a)->c_metadata)->meta);
return dma->base;
}

static inline NPY_DATETIMEUNIT
AK_dt_unit_from_scalar(PyDatetimeScalarObject* dts) {
// Based on convert_pyobject_to_datetime and related usage in datetime.c
Expand Down
Loading
Loading