@@ -6,7 +6,7 @@ using ArrayInterface: fast_scalar_indexing
66using Reexport: @reexport
77using DiffEqBase: DiffEqBase
88@reexport using DiffEqBase
9- using LinearAlgebra: diagm, factorize, norm
9+ using LinearAlgebra: diagm, factorize
1010using PrecompileTools: @compile_workload , @setup_workload
1111using SciMLBase: DAEProblem, SciMLBase
1212
@@ -19,9 +19,9 @@ include("common.jl")
1919const MAXORDER = 6
2020const MAXIT = 10
2121
22- mutable struct JacData
23- a:: Real
24- jac:: Any # Jacobian matrix for the newton solver
22+ mutable struct JacData{T <: Real , M}
23+ a:: T
24+ jac:: M # Jacobian matrix for the newton solver
2525end
2626
2727function dasslStep (
@@ -44,12 +44,14 @@ function dasslStep(
4444 args...
4545 ) where {T <: Number }
4646 if ! fast_scalar_indexing (y0)
47- throw (ArgumentError (
48- " DASSL requires arrays that support fast scalar indexing. " *
49- " GPU arrays (like CuArray, ROCArray) and JLArrays are not supported " *
50- " because the numerical Jacobian computation requires element-wise access. " *
51- " Please use CPU arrays (Vector, Array) or provide an analytical Jacobian."
52- ))
47+ throw (
48+ ArgumentError (
49+ " DASSL requires arrays that support fast scalar indexing. " *
50+ " GPU arrays (like CuArray, ROCArray) and JLArrays are not supported " *
51+ " because the numerical Jacobian computation requires element-wise access. " *
52+ " Please use CPU arrays (Vector, Array) or provide an analytical Jacobian."
53+ )
54+ )
5355 end
5456 n = length (y0)
5557 jd = JacData (zero (tstart), zeros (T, n, n)) # generate a dummy
@@ -379,11 +381,20 @@ function newStepOrderContinuous(
379381 errors[k] = err
380382 ne = length (errors) # == k or k+1
381383
382- # we want to be conservative in our choice of order so we consider
383- # not only a monotone sequences but geometrically monotone
384- # sequences
385- errors_dec = all (diff ([errors[i] for i in max (k - 2 , 1 ): min (ne, maxorder)]) .< 0 )
386- errors_inc = all (diff ([errors[i] for i in max (k - 2 , 1 ): min (ne, maxorder)]) .> 0 )
384+ # Check if errors form a decreasing or increasing sequence
385+ # without allocating arrays
386+ lo = max (k - 2 , 1 )
387+ hi = min (ne, maxorder)
388+ errors_dec = true
389+ errors_inc = true
390+ @inbounds for i in lo: (hi - 1 )
391+ if errors[i + 1 ] >= errors[i]
392+ errors_dec = false
393+ end
394+ if errors[i + 1 ] <= errors[i]
395+ errors_inc = false
396+ end
397+ end
387398
388399 if ne == k + 1 && errors_dec
389400 # If we can estimate the error for k+1 order and the Taylor
457468# step, we can estimate the error for order k+1.
458469#
459470
471+ # Check if all step sizes are equal without allocating diff array
472+ function _all_steps_equal (t_view)
473+ if length (t_view) < 2
474+ return true
475+ end
476+ first_step = t_view[2 ] - t_view[1 ]
477+ @inbounds for i in 3 : length (t_view)
478+ if t_view[i] - t_view[i - 1 ] != first_step
479+ return false
480+ end
481+ end
482+ return true
483+ end
484+
460485# here t is an array of times t=[t_1, ..., t_n, t_{n+1}]
461486# and y is an array of solutions y=[y_1, ..., y_n, y_{n+1}]
462487function errorEstimates (
@@ -476,22 +501,21 @@ function errorEstimates(
476501 # estimates the error for orders [k-2,k-1,k]
477502 errors = zeros (eltype (t), k)
478503
479- for i in max (k - 2 , 1 ): k
480- # @todo can this be optimized by inlining the body of
481- # interpolateHighestDerivative?
482- maxd = interpolateHighestDerivative (t[(end - (i + 1 )): end ], y[(end - (i + 1 )): end ])
504+ @inbounds for i in max (k - 2 , 1 ): k
505+ # Use views to avoid allocation
506+ t_view = @view t[(end - (i + 1 )): end ]
507+ y_view = @view y[(end - (i + 1 )): end ]
508+ maxd = interpolateHighestDerivative (t_view, y_view)
483509 errors[i] = h^ (i + 1 ) * normy (maxd)
484510 end
485511
486512 # compute the estimate the k+1 order only if all the steps were
487513 # made using the same stepsize
488514 if nsteps >= k + 3
489- hn = diff (t[(end - (k + 2 )): end ])
490- if all (hn .== hn[1 ])
491- maxd = interpolateHighestDerivative (
492- t[(end - (k + 2 )): end ],
493- y[(end - (k + 2 )): end ]
494- )
515+ t_view = @view t[(end - (k + 2 )): end ]
516+ if _all_steps_equal (t_view)
517+ y_view = @view y[(end - (k + 2 )): end ]
518+ maxd = interpolateHighestDerivative (t_view, y_view)
495519 push! (errors, h^ (k + 2 ) * normy (maxd))
496520 end
497521 end
@@ -522,15 +546,17 @@ function stepper(
522546 )
523547 l = length (y[1 ]) # the number of dependent variables
524548
525- # @todo this should be the view of the tail of the arrays t and y
526- tk = t[(end - ord + 1 ): end ]
527- yk = y[(end - ord + 1 ): end ]
549+ # Use views to avoid allocations
550+ tk = @view t[(end - ord + 1 ): end ]
551+ yk = @view y[(end - ord + 1 ): end ]
528552
529553 t_next = tk[end ] + h_next
530554
531- # I think there is an error in the book, the sum should be taken
532- # from j=1 to k+1 instead of j=1 to k
533- alphas = - sum ([1 / j for j in 1 : ord])
555+ # Compute sum without allocating array
556+ alphas = zero (eltype (t))
557+ @inbounds for j in 1 : ord
558+ alphas -= 1 / j
559+ end
534560
535561 if length (y) == 1
536562 # this is the first step, we initialize y0 and dy0 with
@@ -573,10 +599,11 @@ function stepper(
573599 normy
574600 ) # the norm used to estimate error needs weights
575601
576- alpha = Array {eltype(t)} (undef, ord + 1 )
577-
578- for i in 1 : ord
579- alpha[i] = h_next / (t_next - t[end - i + 1 ])
602+ # Compute alpha values and alpha0 without allocating array
603+ alpha0 = zero (eltype (t))
604+ @inbounds for i in 1 : ord
605+ alpha_i = h_next / (t_next - t[end - i + 1 ])
606+ alpha0 -= alpha_i
580607 end
581608
582609 if length (t) >= ord + 1
@@ -589,10 +616,9 @@ function stepper(
589616 t0 = t[1 ] - h_next
590617 end
591618
592- alpha[ord + 1 ] = h_next / (t_next - t0)
619+ alpha_ord_plus_1 = h_next / (t_next - t0)
593620
594- alpha0 = - sum (alpha[1 : ord])
595- M = max (alpha[ord + 1 ], abs .(alpha[ord + 1 ] + alphas - alpha0))
621+ M = max (alpha_ord_plus_1, abs (alpha_ord_plus_1 + alphas - alpha0))
596622 err:: eltype (t) = normy ((yc - y0)) * M
597623
598624 # status<0 means the modified Newton method did not converge
@@ -700,7 +726,12 @@ function newton_iteration(
700726end
701727
702728function dassl_norm (v, wt)
703- return norm (v ./ wt) / sqrt (length (v))
729+ # Avoid allocation by computing sum manually
730+ s = zero (eltype (v))
731+ @inbounds for i in eachindex (v, wt)
732+ s += (v[i] / wt[i])^ 2
733+ end
734+ return sqrt (s / length (v))
704735end
705736
706737function dassl_weights (y, reltol, abstol)
@@ -713,23 +744,19 @@ function interpolateAt(
713744 y:: AbstractVector ,
714745 x0:: T
715746 ) where {T <: Real }
716- if length (x) != length (y)
717- error (" x and y have to be of the same size." )
718- end
719-
720747 n = length (x)
748+ # Pre-allocate output and accumulate in-place
721749 p = zero (y[1 ])
722750
723- for i in 1 : n
751+ @inbounds for i in 1 : n
724752 Li = one (T)
725753 for j in 1 : n
726- if j == i
727- continue
728- else
754+ if j != i
729755 Li *= (x0 - x[j]) / (x[i] - x[j])
730756 end
731757 end
732- p += Li * y[i]
758+ # Use broadcasting in-place to avoid allocation
759+ p = p .+ Li .* y[i]
733760 end
734761 return p
735762end
@@ -741,31 +768,23 @@ function interpolateDerivativeAt(
741768 y:: AbstractVector ,
742769 x0:: T
743770 ) where {T <: Real }
744- if length (x) != length (y)
745- error (" x and y have to be of the same size." )
746- end
747-
748771 n = length (x)
749772 p = zero (y[1 ])
750773
751- for i in 1 : n
774+ @inbounds for i in 1 : n
752775 dLi = zero (T)
753776 for k in 1 : n
754- if k == i
755- continue
756- else
777+ if k != i
757778 dLi1 = one (T)
758779 for j in 1 : n
759- if j == k || j == i
760- continue
761- else
780+ if j != k && j != i
762781 dLi1 *= (x0 - x[j]) / (x[i] - x[j])
763782 end
764783 end
765784 dLi += dLi1 / (x[i] - x[k])
766785 end
767786 end
768- p += dLi * y[i]
787+ p = p .+ dLi . * y[i]
769788 end
770789 return p
771790end
@@ -777,25 +796,19 @@ function interpolateHighestDerivative(
777796 x:: AbstractVector ,
778797 y:: AbstractVector
779798 )
780- if length (x) != length (y)
781- error (" x and y have to be of the same size." )
782- end
783-
784799 n = length (x)
785800 p = zero (y[1 ])
786801
787- for i in 1 : n
802+ @inbounds for i in 1 : n
788803 Li = one (eltype (x))
789804 for j in 1 : n
790- if j == i
791- continue
792- else
805+ if j != i
793806 Li *= 1 / (x[i] - x[j])
794807 end
795808 end
796- p += Li * y[i]
809+ p = p .+ Li . * y[i]
797810 end
798- return factorial (n - 1 ) * p
811+ return factorial (n - 1 ) . * p
799812end
800813
801814# generate a function that computes approximate jacobian using forward
0 commit comments