Skip to content

Commit 50d841c

Browse files
committed
Optimize continuum
1 parent 2a696f8 commit 50d841c

7 files changed

+881
-87
lines changed

README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44
./startlab.sh
55
```
66

7-
## Python depenencies
7+
## Python dependencies
88
```
99
spectral
1010
scipy
1111
numpy
1212
matplotlib
1313
sklearn
14+
numba
1415
```
1516

1617
## To build C module use

ccontinuum.c

+69-4
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
#include "ccontinuum.h"
22

33
#include <stdio.h>
4+
#include <math.h>
45

56
// Used to represent wavelengths in case we switch
67
// to algorithm that includes wavelengths.
78
#define WL(x) (wavelengths[x])
89

9-
void continuum(double input[], double output[], double wavelengths[], size_t n) {
10-
10+
void continuum_old(double input[], double output[], double wavelengths[], size_t n) {
1111
output[0] = input[0];
1212
// i points to the last point that belongs to the curve.
1313
// j points to the current potential point.
@@ -44,9 +44,74 @@ void continuum(double input[], double output[], double wavelengths[], size_t n)
4444
}
4545
}
4646

47-
void continuum_removed(double input[], double output[], double wavelengths[], size_t n) {
48-
continuum(input, output, wavelengths, n);
47+
void continuum_removed(double input[], double output[], double wavelengths[], size_t indices[], size_t n) {
48+
continuum(input, output, wavelengths, indices, n);
4949
for (size_t i = 0; i < n; ++i) {
5050
output[i] = input[i] / output[i];
5151
}
5252
}
53+
54+
struct data_t {
55+
double* spectrum;
56+
double* wavelengths;
57+
size_t* indices;
58+
size_t n;
59+
size_t ind_fill;
60+
};
61+
62+
static void find_indices(struct data_t* data, size_t ibegin, size_t iend) {
63+
double* spectrum = data->spectrum;
64+
double* wavelengths = data->wavelengths;
65+
size_t iendi = iend - 1;
66+
double naxis_y = wavelengths[iendi] - wavelengths[ibegin];
67+
double naxis_x = spectrum[ibegin] - spectrum[iendi];
68+
double maxval = -INFINITY;
69+
double imax = ibegin;
70+
71+
for(size_t i = ibegin; i < iendi; ++i) {
72+
double newval = wavelengths[i] * naxis_x + spectrum[i] * naxis_y;
73+
if (newval > maxval) {
74+
maxval = newval;
75+
imax = i;
76+
}
77+
}
78+
79+
if (imax == ibegin) {
80+
return;
81+
}
82+
83+
if (imax > ibegin + 1) {
84+
find_indices(data, ibegin, imax + 1);
85+
}
86+
87+
data->indices[data->ind_fill++] = imax;
88+
89+
if (imax < iend - 2) {
90+
find_indices(data, imax, iend);
91+
}
92+
}
93+
94+
void continuum(double spectrum[], double output[], double wavelengths[], size_t indices[], size_t n) {
95+
struct data_t data = { .spectrum = spectrum, .wavelengths = wavelengths, .indices = indices, .n = n, .ind_fill = 1 };
96+
97+
// Find indices of points that belong to convex hull.
98+
indices[0] = 0;
99+
find_indices(&data, 0, n);
100+
indices[data.ind_fill] = n - 1; // Didn't increase ind_fill on purpose.
101+
102+
// Linear interpolation of points.
103+
for (size_t i = 0; i < data.ind_fill; ++i) {
104+
size_t ibegin = indices[i];
105+
size_t iend = indices[i + 1];
106+
// Put exact values where possible.
107+
output[ibegin] = spectrum[ibegin];
108+
// Calculate line parameters.
109+
double a = (spectrum[iend] - spectrum[ibegin]) / (wavelengths[iend] - wavelengths[ibegin]);
110+
double b = spectrum[ibegin] - a * wavelengths[ibegin];
111+
// Fill.
112+
for (size_t j = ibegin + 1; j < iend; ++j) {
113+
output[j] = a * wavelengths[j] + b;
114+
}
115+
}
116+
output[n - 1] = spectrum[n - 1];
117+
}

ccontinuum.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11

22
#include <stddef.h>
33

4-
void continuum(double input[], double output[], double wavelength[], size_t n);
5-
void continuum_removed(double input[], double output[], double wavelength[], size_t n);
4+
void continuum_old(double input[], double output[], double wavelength[], size_t n);
5+
void continuum_removed(double input[], double output[], double wavelength[], size_t indices[], size_t n);
6+
void continuum(double spectrum[], double output[], double wavelengths[], size_t indices[], size_t n);

ccontinuummodule.c

+51-10
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
static PyObject *ErrorObject;
2424

2525
/*
26-
Check that PyArrayObject is a double (Float) type and a 1d, or 2d array.
26+
Check that PyArrayObject is a double (Float) type and a 1d, or 2d, or 3d array.
2727
Return 1 if an error and raise exception.
2828
*/
2929
static int check_type(PyArrayObject* a, int min_dim, int max_dim) {
@@ -41,11 +41,15 @@ static int check_type(PyArrayObject* a, int min_dim, int max_dim) {
4141
*/
4242
static int check_same_shapes(PyArrayObject* a, PyArrayObject* b) {
4343
if (PyArray_NDIM(a) != PyArray_NDIM(b)) {
44+
PyErr_SetString(PyExc_ValueError,
45+
"In check_same_shapes: arrays must have same number of dimensions.");
4446
return 1;
4547
}
4648
Py_ssize_t ndims = PyArray_NDIM(a);
4749
for (int i = 0; i < ndims; ++i) {
4850
if (PyArray_SHAPE(a)[i] != PyArray_SHAPE(b)[i]) {
51+
PyErr_SetString(PyExc_ValueError,
52+
"In check_same_shapes: arrays must have same number of dimensions.");
4953
return 1;
5054
}
5155
}
@@ -55,7 +59,7 @@ static int check_same_shapes(PyArrayObject* a, PyArrayObject* b) {
5559

5660
static PyObject *
5761
continuum_generic_impl(PyObject *self, PyObject *args,
58-
void (*continuum_processing_f)(double*, double*, double*, size_t))
62+
void (*continuum_processing_f)(double*, double*, double*, size_t*, size_t))
5963
{
6064
PyArrayObject* ain;
6165
PyArrayObject* aout;
@@ -69,8 +73,8 @@ continuum_generic_impl(PyObject *self, PyObject *args,
6973
if (NULL == aout) return Py_None;
7074
if (NULL == awl) return Py_None;
7175

72-
if (check_type(ain, 1, 2)) return Py_None;
73-
if (check_type(aout, 1, 2)) return Py_None;
76+
if (check_type(ain, 1, 3)) return Py_None;
77+
if (check_type(aout, 1, 3)) return Py_None;
7478
if (check_type(awl, 1, 1)) return Py_None;
7579

7680
if (check_same_shapes(ain, aout)) return Py_None;
@@ -86,8 +90,10 @@ continuum_generic_impl(PyObject *self, PyObject *args,
8690
"In continuum_generic_impl: wavelengths array has incorrect length.");
8791
return Py_None;
8892
}
89-
continuum_processing_f(datain, dataout, dataawl, spectrum_length);
90-
} else {
93+
size_t* indices = malloc(sizeof(size_t) * spectrum_length);
94+
continuum_processing_f(datain, dataout, dataawl, indices, spectrum_length);
95+
free(indices);
96+
} else if (PyArray_NDIM(ain) == 2) {
9197
Py_ssize_t num_spectra = PyArray_SHAPE(ain)[0];
9298
Py_ssize_t spectrum_length = PyArray_SHAPE(ain)[1];
9399
if (spectrum_length != PyArray_SHAPE(awl)[0]) {
@@ -96,14 +102,49 @@ continuum_generic_impl(PyObject *self, PyObject *args,
96102
return Py_None;
97103
}
98104

99-
//#pragma omp parallel for
105+
#pragma omp parallel
106+
{
107+
size_t* indices = malloc(sizeof(size_t) * spectrum_length);
108+
109+
#pragma omp for
100110
for (Py_ssize_t i = 0; i < num_spectra; ++i) {
101111
double* datain = (double*)(PyArray_DATA(ain)
102112
+ i * PyArray_STRIDES(ain)[0]);
103113
double* dataout = (double*)(PyArray_DATA(aout)
104114
+ i * PyArray_STRIDES(aout)[0]);
105-
continuum_processing_f(datain, dataout, dataawl, spectrum_length);
115+
continuum_processing_f(datain, dataout, dataawl, indices, spectrum_length);
116+
}
117+
118+
free(indices);
119+
} // pragma omp parallel
120+
} else {
121+
Py_ssize_t num_rows = PyArray_SHAPE(ain)[0];
122+
Py_ssize_t num_cols = PyArray_SHAPE(ain)[1];
123+
Py_ssize_t spectrum_length = PyArray_SHAPE(ain)[2];
124+
if (spectrum_length != PyArray_SHAPE(awl)[0]) {
125+
PyErr_SetString(PyExc_ValueError,
126+
"In continuum_generic_impl: wavelengths array has incorrect length.");
127+
return Py_None;
106128
}
129+
130+
//TODO: This could be optimized for better work sharing.
131+
#pragma omp parallel
132+
{
133+
size_t* indices = malloc(sizeof(size_t) * spectrum_length);
134+
135+
#pragma omp for
136+
for (Py_ssize_t i = 0; i < num_rows; ++i) {
137+
for (Py_ssize_t j = 0; j < num_cols; ++j) {
138+
double* datain = (double*)(PyArray_DATA(ain)
139+
+ i * PyArray_STRIDES(ain)[0] + j * PyArray_STRIDES(ain)[1]);
140+
double* dataout = (double*)(PyArray_DATA(aout)
141+
+ i * PyArray_STRIDES(aout)[0] + j * PyArray_STRIDES(aout)[1]);
142+
continuum_processing_f(datain, dataout, dataawl, indices, spectrum_length);
143+
}
144+
}
145+
146+
free(indices);
147+
} // pragma omp parallel
107148
}
108149

109150
return Py_None;
@@ -114,7 +155,7 @@ continuum_generic_impl(PyObject *self, PyObject *args,
114155
PyDoc_STRVAR(ccontinuum_continuum_doc,
115156
"continuum(spectrum)\n\
116157
\n\
117-
Return continuum of the spectrum.");
158+
Return continuum of the spectrum or each spectrum in image.");
118159

119160
static PyObject *
120161
ccontinuum_continuum(PyObject *self, PyObject *args)
@@ -127,7 +168,7 @@ ccontinuum_continuum(PyObject *self, PyObject *args)
127168
PyDoc_STRVAR(ccontinuum_continuum_removed_doc,
128169
"continuum_removed(spectrum)\n\
129170
\n\
130-
Return continuum removed spectrum.");
171+
Return continuum removed spectrum or image.");
131172

132173
static PyObject *
133174
ccontinuum_continuum_removed(PyObject *self, PyObject *args)

continuum_convex_hull.ipynb

+598
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)