From ce90a046512809726d4cc9f8c5978c475b253a5f Mon Sep 17 00:00:00 2001 From: Maria Patrou Date: Thu, 18 Jan 2024 16:51:10 -0500 Subject: [PATCH] pytorch apis added, prototype in progress --- example/sphere_pytorch_prototype.py | 33 +++++++++++------- sasmodels/kerneltorch.py | 53 +++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 27 deletions(-) diff --git a/example/sphere_pytorch_prototype.py b/example/sphere_pytorch_prototype.py index 83322794..3e948356 100644 --- a/example/sphere_pytorch_prototype.py +++ b/example/sphere_pytorch_prototype.py @@ -4,21 +4,27 @@ from matplotlib import pyplot as plt from sasmodels.core import load_model from sasmodels.direct_model import call_kernel,get_mesh -from sasmodels.details import make_kernel_args, dispersion_mesh +from sasmodels.details import make_kernel_args +import sasmodels.kerneltorch as kt +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +print("device",device) + +def make_kernel(model, q_vectors,device): + """Instantiate the python kernel with input *q_vectors*""" + q_input = kt.PyInput(q_vectors, dtype=torch.float64) + return kt.PyKernel(model.info, q_input, device = device) -#cuda_src = sas_3j1x_x + sphere_c -#Step 1. Define r and q vectors model = load_model('_spherepy') -q = logspace(-3, -1, 20) +q = logspace(-3, -1, 200) print("q",q[6]) kernel = model.make_kernel([q]) -pars = {'radius': 20, 'radius_pd': 0.1, 'radius_pd_n':100, 'scale': 2} +pars = {'radius': 200, 'radius_pd': 0.1, 'radius_pd_n':100, 'scale': 2} t_before = time.time() Iq = call_kernel(kernel, pars) @@ -28,19 +34,20 @@ print("Tota Numpy: ",total_np) t_before = time.time() -q_t = torch.logspace(start=-3, end=-1, steps=200) -kernel = model.make_kernel([q_t]) +q_t = torch.logspace(start=-3, end=-1, steps=200).to(device) +kernel = make_kernel(model, [q_t],device) +Iq_t = call_kernel(kernel, pars) # call_kernel unwrap -calculator = kernel -cutoff=0. -mono=False +#calculator = kernel +#cutoff=0. +#mono=False -mesh = get_mesh(calculator.info, pars, dim=calculator.dim, mono=mono) +#mesh = get_mesh(calculator.info, pars, dim=calculator.dim, mono=mono) #print("in call_kernel: pars:", list(zip(*mesh))[0]) -call_details, values, is_magnetic = make_kernel_args(calculator, mesh) +#call_details, values, is_magnetic = make_kernel_args(calculator, mesh) #print("in call_kernel: values:", values) -Iq_t = calculator(call_details, values, cutoff, is_magnetic) +#Iq_t = calculator(call_details, values, cutoff, is_magnetic) t_after = time.time() total_torch = t_after -t_before diff --git a/sasmodels/kerneltorch.py b/sasmodels/kerneltorch.py index a9f0f539..833a3a00 100644 --- a/sasmodels/kerneltorch.py +++ b/sasmodels/kerneltorch.py @@ -108,6 +108,8 @@ class PyKernel(Kernel): *q_input* is the DllInput q vectors at which the kernel should be evaluated. + *device* : cpu or cuda for calculations with pytorch on CPUs or GPUs + The resulting call method takes the *pars*, a list of values for the fixed parameters to the kernel, and *pd_pars*, a list of (value,weight) vectors for the polydisperse parameters. *cutoff* determines the @@ -116,8 +118,9 @@ class PyKernel(Kernel): Call :meth:`release` when done with the kernel instance. """ - def __init__(self, model_info, q_input): - # type: (ModelInfo, List[np.ndarray]) -> None + def __init__(self, model_info, q_input, device): + # type: (ModelInfo, List[np.ndarray], str) -> None + self.device = device self.dtype = np.dtype('d') self.info = model_info self.q_input = q_input @@ -184,13 +187,13 @@ def _call_kernel(self, call_details, values, cutoff, magnetic, radius_effective_ # type: (CallDetails, np.ndarray, np.ndarray, float, bool) -> None if magnetic: raise NotImplementedError("Magnetism not implemented for pure python models") - #print("Calling python kernel") + #call_details.show(values) radius = ((lambda: 0.0) if radius_effective_mode == 0 else (lambda: self._radius(radius_effective_mode))) self.result = _loops( self._parameter_vector, self._form, self._volume, radius, - self.q_input.nq, call_details, values, cutoff) + self.q_input.nq, call_details, values, cutoff,self.device) def release(self): # type: () -> None @@ -202,8 +205,8 @@ def release(self): def _loops(parameters, form, form_volume, form_radius, nq, call_details, - values, cutoff): - # type: (np.ndarray, Callable[[], np.ndarray], Callable[[], float], Callable[[], float], int, CallDetails, np.ndarray, float) -> None + values, cutoff,device = 'cpu'): + # type: (np.ndarray, Callable[[], np.ndarray], Callable[[], float], Callable[[], float], int, CallDetails, np.ndarray, float, str) -> None ################################################################ # # # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # @@ -221,8 +224,12 @@ def _loops(parameters, form, form_volume, form_radius, nq, call_details, # mesh, we update the components with the polydispersity values before # calling the respective functions. n_pars = len(parameters) - parameters[:] = values[2:n_pars+2] + parameters = torch.DoubleTensor(parameters).to(device) + #parameters[:] = values[2:n_pars+2] + parameters[:] = torch.DoubleTensor(values[2:n_pars+2]) + + print("parameters",parameters) if call_details.num_active == 0: total = form() weight_norm = 1.0 @@ -230,8 +237,12 @@ def _loops(parameters, form, form_volume, form_radius, nq, call_details, weighted_radius = form_radius() else: - pd_value = values[2+n_pars:2+n_pars + call_details.num_weights] - pd_weight = values[2+n_pars + call_details.num_weights:] + #transform to tensor flow + pd_value = torch.DoubleTensor(values[2+n_pars:2+n_pars + call_details.num_weights]) + pd_weight = torch.DoubleTensor(values[2+n_pars + call_details.num_weights:]) + + #print("pd_value",pd_value) + #print("pd_weight",pd_weight) weight_norm = 0.0 weighted_form = 0.0 @@ -250,15 +261,26 @@ def _loops(parameters, form, form_volume, form_radius, nq, call_details, pd_stride = call_details.pd_stride[:call_details.num_active] pd_length = call_details.pd_length[:call_details.num_active] - total = np.zeros(nq, np.float64) + #total = np.zeros(nq, np.float64) + total = torch.zeros(nq, dtype= torch.float64).to(device) + + #print("ll", range(call_details.num_eval)) + #parallel for loop + # each look_index can be GPU/CPU thread: tid + # each thread has its own pd_index and p0_index + for loop_index in range(call_details.num_eval): # Update polydispersity parameter values. if p0_index == p0_length: pd_index = (loop_index//pd_stride)%pd_length parameters[pd_par] = pd_value[pd_offset+pd_index] - partial_weight = np.prod(pd_weight[pd_offset+pd_index][1:]) + #partial_weight = np.prod(pd_weight[pd_offset+pd_index][1:]) + partial_weight = torch.prod(pd_weight[pd_offset+pd_index][1:]) + p0_index = loop_index%p0_length + # weight can become and array of weights calculated in parallel. + # weights[tid] = partial_weight * pd_weight[p0_offset + p0_index] weight = partial_weight * pd_weight[p0_offset + p0_index] parameters[p0_par] = pd_value[p0_offset + p0_index] p0_index += 1 @@ -267,8 +289,9 @@ def _loops(parameters, form, form_volume, form_radius, nq, call_details, # Assume that NaNs are only generated if the parameters are bad; # exclude all q for that NaN. Even better would be to have an # INVALID expression like the C models, but that is expensive. - Iq = np.asarray(form(), 'd') - if np.isnan(Iq).any(): + #Iq = np.asarray(form(), 'd') + Iq = torch.asarray(form()).to(device) + if torch.isnan(Iq).any(): continue # Update value and norm. @@ -279,7 +302,9 @@ def _loops(parameters, form, form_volume, form_radius, nq, call_details, weighted_form += weight * unweighted_form weighted_radius += weight * form_radius() - result = np.hstack((total, weight_norm, weighted_form, weighted_shell, weighted_radius)) + #result = np.hstack((total, weight_norm, weighted_form, weighted_shell, weighted_radius)) + result = torch.hstack((total, weight_norm, weighted_form, weighted_shell, weighted_radius)).to(device) + return result