-
Notifications
You must be signed in to change notification settings - Fork 93
Reduce RAM usage of nonlocal term #1088
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
ebbcc6a
1c3b8a9
49c904b
1bd11c6
903bf9c
c2d6d66
3df5bb0
9146d61
4f18296
3555c31
138ea8d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -144,6 +144,119 @@ | |
proj_coeffs | ||
end | ||
|
||
""" | ||
Projector set of a single atom (independent of the atom's position), | ||
and the structure factor for the atom position. Used inside NonlocalProjectors | ||
such that the projector set can be reused for multiple atoms in the same atom group. | ||
""" | ||
struct AtomProjectors{VT <: AbstractVector, PT <: AbstractMatrix} | ||
# nbasis | ||
structure_factors::VT | ||
# nbasis x nproj | ||
projectors::PT | ||
end | ||
|
||
""" | ||
Matrix-like type to represent the nonlocal projection vectors P without | ||
allocating the full matrix. | ||
This type extends AbstractMatrix, but it does not implement all | ||
the required methods, only those that were shown to be needed. | ||
In particular, random access to the matrix elements is not supported. | ||
""" | ||
struct NonlocalProjectors{T <: Real, | ||
ST <: AbstractVector{Complex{T}}, | ||
PT <: AtomProjectors, | ||
} <: AbstractMatrix{Complex{T}} | ||
# TODO: this is a real problem wrt. thread-safety, no? | ||
# nbasis | ||
proj_scratch::ST | ||
atoms::Vector{PT} | ||
end | ||
function NonlocalProjectors(atoms::Vector{<:AtomProjectors}) | ||
at = first(atoms) | ||
T = promote_type(eltype(at.structure_factors), eltype(at.projectors)) | ||
proj_scratch = similar(at.structure_factors, T) | ||
NonlocalProjectors(proj_scratch, atoms) | ||
end | ||
|
||
function Base.size(P::NonlocalProjectors) | ||
n = length(P.proj_scratch) | ||
m = sum(size(at.projectors, 2) for at in P.atoms) | ||
(n, m) | ||
end | ||
function Base.Matrix(P::NonlocalProjectors{T}) where {T} | ||
n, m = size(P) | ||
out = zeros(Complex{T}, n, m) | ||
iproj = 1 | ||
for at in P.atoms | ||
for proj in eachcol(at.projectors) | ||
out[:, iproj] .= at.structure_factors .* proj | ||
iproj += 1 | ||
end | ||
end | ||
out | ||
end | ||
|
||
function Base.show(io::IO, P::NonlocalProjectors) | ||
print(io, "DFTK.NonlocalProjectors{") | ||
show(io, P.atoms) | ||
print(io, "}") | ||
end | ||
function Base.show(io::IO, ::MIME"text/plain", P::NonlocalProjectors) | ||
print(io, summary(P)) | ||
end | ||
|
||
# Add a level of indirection here to avoid ambiguity with the mul! method provided by Julia. | ||
LinearAlgebra.mul!(C::AbstractVector, A::Adjoint{<:Any, <:NonlocalProjectors}, | ||
ψk::AbstractVector) = _mul!(C, A, ψk) | ||
LinearAlgebra.mul!(C::AbstractMatrix, A::Adjoint{<:Any, <:NonlocalProjectors}, | ||
ψk::AbstractMatrix) = _mul!(C, A, ψk) | ||
|
||
LinearAlgebra.mul!(C::AbstractVector, A::NonlocalProjectors, B::AbstractVector, | ||
α::Number, β::Number) = _mul!(C, A, B, α, β) | ||
LinearAlgebra.mul!(C::AbstractMatrix, A::NonlocalProjectors, B::AbstractMatrix, | ||
α::Number, β::Number) = _mul!(C, A, B, α, β) | ||
|
||
function _mul!(C::AbstractVecOrMat, A::Adjoint{<:Any, <:NonlocalProjectors}, | ||
ψk::AbstractVecOrMat) | ||
if size(C, 1) != size(A, 1) || size(A, 2) != size(ψk, 1) || size(ψk, 2) != size(C, 2) | ||
throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(ψk)), C has size $(size(C))")) | ||
end | ||
|
||
iproj = 1 | ||
proj_scratch = A.parent.proj_scratch | ||
for at in A.parent.atoms | ||
for proj in eachcol(at.projectors) | ||
proj_scratch .= at.structure_factors .* proj | ||
@views mul!(C[iproj:iproj, :], proj_scratch', ψk) | ||
iproj += 1 | ||
end | ||
end | ||
C | ||
end | ||
|
||
function _mul!(C::AbstractArray, A::NonlocalProjectors, B::AbstractArray, | ||
α::Number, β::Number) | ||
if size(C, 1) != size(A, 1) || size(A, 2) != size(B, 1) || size(B, 2) != size(C, 2) | ||
throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))")) | ||
end | ||
|
||
C .*= β | ||
|
||
iproj = 1 | ||
proj_scratch = A.proj_scratch | ||
for at in A.atoms | ||
for proj in eachcol(at.projectors) | ||
# TODO: does this use BLAS? | ||
proj_scratch .= at.structure_factors .* proj | ||
for iband in axes(B, 2) | ||
@views C[:, iband] .+= proj_scratch .* (α * B[iproj, iband]) | ||
end | ||
iproj += 1 | ||
end | ||
end | ||
C | ||
end | ||
|
||
@doc raw""" | ||
Build projection vectors for a atoms array generated by term_nonlocal | ||
|
@@ -171,36 +284,31 @@ | |
psps::AbstractVector{<: NormConservingPsp}, | ||
psp_positions) where {T} | ||
unit_cell_volume = basis.model.unit_cell_volume | ||
n_proj = count_n_proj(psps, psp_positions) | ||
n_G = length(G_vectors(basis, kpt)) | ||
proj_vectors = zeros(Complex{eltype(psp_positions[1][1])}, n_G, n_proj) | ||
G_plus_k = to_cpu(Gplusk_vectors(basis, kpt)) | ||
|
||
# Compute the columns of proj_vectors = 1/√Ω \hat proj_i(k+G) | ||
# Since the proj_i are translates of each others, \hat proj_i(k+G) decouples as | ||
# \hat proj_i(p) = ∫ proj(r-R) e^{-ip·r} dr = e^{-ip·R} \hat proj(p). | ||
# The first term is the structure factor, the second the form factor. | ||
offset = 0 # offset into proj_vectors | ||
for (psp, positions) in zip(psps, psp_positions) | ||
atom_projectors = reduce(vcat, map(zip(psps, psp_positions)) do (psp, positions) | ||
# Compute position-independent form factors | ||
G_plus_k_cart = to_cpu(Gplusk_vectors_cart(basis, kpt)) | ||
form_factors = build_projector_form_factors(psp, G_plus_k_cart) | ||
psp_form_factors = build_projector_form_factors(psp, G_plus_k_cart) | ||
psp_form_factors ./= sqrt(unit_cell_volume) | ||
# Offload potential values to a device (like a GPU), | ||
# and make sure to share this allocation for all atoms in the group | ||
psp_form_factors = to_device(basis.architecture, psp_form_factors) | ||
|
||
# Combine with structure factors | ||
for r in positions | ||
map(positions) do r | ||
# k+G in this formula can also be G, this only changes an unimportant phase factor | ||
structure_factors = map(p -> cis2pi(-dot(p, r)), G_plus_k) | ||
@views for iproj = 1:count_n_proj(psp) | ||
proj_vectors[:, offset+iproj] .= | ||
structure_factors .* form_factors[:, iproj] ./ sqrt(unit_cell_volume) | ||
end | ||
offset += count_n_proj(psp) | ||
structure_factors = to_device(basis.architecture, map(p -> cis2pi(-dot(p, r)), G_plus_k)) | ||
AtomProjectors(structure_factors, psp_form_factors) | ||
end | ||
end | ||
@assert offset == n_proj | ||
end) | ||
|
||
# Offload potential values to a device (like a GPU) | ||
to_device(basis.architecture, proj_vectors) | ||
NonlocalProjectors(atom_projectors) | ||
end | ||
|
||
""" | ||
|
@@ -282,6 +390,7 @@ | |
D = build_projection_coefficients(basis, psp_groups) | ||
P = build_projection_vectors(basis, kpt, psp_groups, positions) | ||
P_minus_q = build_projection_vectors(basis, kpt_minus_q, psp_groups, positions) | ||
# TODO: probably needs an extra parenthesis to first compute P'ψ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So... I noticed that Julia has custom |
||
P * (D * P_minus_q' * ψk) | ||
end | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -250,3 +250,42 @@ end | |
end | ||
end | ||
end | ||
|
||
@testitem "Test nonlocal term operations" tags=[:psp] setup=[mPspUpf] begin | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure about this test. The goal was just for me to be able to try my changes with <5s waiting time. |
||
using DFTK | ||
using LinearAlgebra | ||
|
||
lattice = 5 * I(3) | ||
positions = [zeros(3), 1/3 .* ones(3), 2/3 .* ones(3)] | ||
for (element, psp) in mPspUpf.upf_pseudos | ||
if sum(psp.r2_ρion) > 0 # Otherwise, it's all 0 in the UPF as a placeholder | ||
el = ElementPsp(element, psp) | ||
atoms = [el, el, el] | ||
model = model_DFT(lattice, atoms, positions; functionals=LDA()) | ||
basis = PlaneWaveBasis(model; Ecut=5, kgrid=[2, 2, 2]) | ||
n_bands = 7 | ||
ψ = [DFTK.random_orbitals(basis, kpt, n_bands) for kpt in basis.kpoints] | ||
occ = [2.0 * ones(n_bands) for _ in basis.kpoints] | ||
ρ = DFTK.compute_density(basis, ψ, occ) | ||
|
||
energies, ham = DFTK.energy_hamiltonian(basis, ψ, occ; ρ) | ||
hamψ = ham * ψ | ||
|
||
hblock = ham.blocks[1] | ||
nonloc = hblock.nonlocal_op | ||
nonlocal_dense = Matrix(nonloc) | ||
ψk = ψ[1] | ||
|
||
Hψk = zero(ψk) | ||
DFTK.apply!((;fourier=Hψk), nonloc, (; fourier=ψk)) | ||
Hψk_dense = nonlocal_dense * ψk | ||
|
||
Pψk = nonloc.P' * ψk | ||
DPψk = nonloc.D * Pψk | ||
@show norm(nonloc.P * DPψk) norm(Matrix(nonloc.P) * DPψk) | ||
@assert @show(norm(nonloc.P * DPψk - Matrix(nonloc.P) * DPψk)) < 1e-10 | ||
|
||
@assert @show(norm(Hψk - Hψk_dense)) < 1e-10 | ||
end | ||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How bad is this?
DftHamiltonianBlock
should handle it fine, butGenericHamiltonianBlock
seems to be parallelizing over bands which will cause problems!