Skip to content

Commit 32eae09

Browse files
author
Robert McLay
committed
Adding HPSF slides to repo
1 parent f5bb400 commit 32eae09

File tree

3 files changed

+246
-11
lines changed

3 files changed

+246
-11
lines changed

my_docs/26/HPSF_26/Notes_HPSF.org

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,83 @@
1-
Notes for HPSF Conference presentation: Chicogo March 18, 2026 25 minutes total
1+
Notes for HPSF Conference presentation: Chicogo March 18, 2026 25
2+
minutes total.
23

4+
Things to add to sc25 talk
5+
6+
* Issue #804/#805
7+
two people from the Easybuild community asked that Lmod be able to
8+
how to load a module instead of telling a user to use "module spider".
9+
This request/feature is when a site is using a software hierarchy
10+
where a user has to load a compiler to see compiler dependent modules
11+
and similarly for compiler-mpi dependent modules.
12+
13+
** "module spider" reports what compilers etc are needed to load a module
14+
** Interesting bug found where sometimes the spider report is wrong.
15+
** Lmod Maintainer Matthew Cawood (@TACC) used an AI to find the bug:
16+
*** Show AI_805.png
17+
*** Summaries: Root Cause: The bug is in the fallback: lines (396-421)
18+
*** ... When `commonPaths` is empty only suggest paths from the module with the fewest paths.
19+
** Result:
20+
$ module load Python QGIS
21+
Lmod has detected the following error: ...
22+
Try: "module spider Python QGIS" to see how to load the module(s).
23+
Or load any one of these options:
24+
module load GCC/12.3.0 OpenMPI/4.1.5 Python QGIS
25+
26+
* Slow loading of modules that have large dependencies
27+
** Lmod supports the function `depends_on()` to say that one module depends on one or more modules.
28+
** Lmod checks to see that all dependencies are met.
29+
** For modules with a number of dependencies this could be slow
30+
** Lmod passes the state of loaded module through the moduleTable which is a Lua Table (AKA Dictionary)
31+
** "command module --mt"
32+
_ModuleTable_ = {
33+
MTversion = 3,
34+
family = {
35+
MPI = "mpich",
36+
compiler = "gcc",
37+
},
38+
mT = {
39+
boost = {
40+
fn = "..."
41+
fullName = "boost/1.76.0",
42+
loadOrder = 7,
43+
...
44+
},
45+
gcc = {
46+
fn = "..."
47+
fullName = "gcc/11.2.0",
48+
loadOrder = 4,
49+
...
50+
},
51+
...
52+
},
53+
}
54+
** Lmod used to reload all currently loaded modules to check that all dependencies were met.
55+
** Now Lmod encodes the dependences in the moduleTable in the user's environment.
56+
57+
Module A:
58+
depends_on("x/1.0")
59+
depends_on_any("xx","yy/1.0")
60+
61+
62+
_ModuleTable_ = {
63+
MTversion = 3,
64+
mT = {
65+
A = {
66+
depT = {
67+
depA = { { sn = "x", version = {... },}, },
68+
doaA = { {
69+
{ sn = "xx", version = {...}, },
70+
{ sn = "yy", version = {...}, },
71+
},
72+
},
73+
},
74+
},
75+
},
76+
}
77+
78+
** Speed improvements:
79+
*** Not rebuilding the spider cache after a dependent load
80+
*** Storing dpendency tree in moduleTable
81+
For a module with 299 dependent modules
82+
Old: 40.2 senonds
83+
New 5.2 seconds
214 KB
Binary file not shown.

my_docs/26/HPSF_26/presentation.tex

Lines changed: 164 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,52 @@
88
\usepackage{graphicx}
99
\usepackage{hyperref}
1010

11-
\title[Lmod at SC25]{Lmod: The Lua-Based Environment Module System}
12-
\subtitle{Texas Advanced Computing Center (TACC) Booth Talk -- Supercomputing 2025}
13-
\author[M. Cawood]{Matthew Cawood}
14-
\date{SC25 -- St. Louis, Missouri}
11+
\title[Lmod at HPSF]{Lmod: The Lua-Based Environment Module System}
12+
%\subtitle{Texas Advanced Computing Center}
13+
\author{Robert McLay~\inst{1} \and Matthew Cawood~\inst{1} \and Xavier
14+
Delaruelle~\inst{2}}
15+
\institute[A]{\textsuperscript{1}Texas Advanced Computing Center\\ %
16+
\textsuperscript{2} CEA\\ %
17+
}
1518

19+
\date{HPSF -- Chicago, IL}
20+
21+
%% page
22+
%\begin{frame}{}
23+
% \begin{itemize}
24+
% \item
25+
% \end{itemize}
26+
%\end{frame}
27+
%
28+
%% page
29+
%\begin{frame}[fragile]
30+
% \frametitle{}
31+
% {\tiny
32+
% \begin{semiverbatim}
33+
% \end{semiverbatim}
34+
%}
35+
% \begin{itemize}
36+
% \item
37+
% \end{itemize}
38+
%
39+
%\end{frame}
40+
%
1641
\begin{document}
1742

1843
% --- Title Slide ---
1944
\begin{frame}
2045
\titlepage
2146
\end{frame}
2247

48+
%
49+
% page
50+
\begin{frame}{Thanking Xavier Delaruelle}
51+
\begin{itemize}
52+
\item Robert and Matthew would like to thank Xavier for presenting
53+
\item Neither of us could be at HPSF.
54+
\end{itemize}
55+
\end{frame}
56+
2357
% --- What is Lmod ---
2458
\begin{frame}{What is Lmod?}
2559
\begin{columns}
@@ -64,22 +98,142 @@
6498
\begin{itemize}
6599
\item \textbf{Irreversible mode}: unloading can now set environment variables for cleanup.
66100
\item \textbf{depends\_on\_any()}: allows dependency on any member of a module set.
67-
\item \textbf{Enhanced hide\{\}/forbid\{\}}: expression-based control by user, group, or date.
101+
\item \textbf{Enhanced hide\{\}/forbid\{\}}: expression-based
102+
control by user, group, or date (Stolen from Env. Modules)
68103
\item \textbf{Optional tracking v2}: database shrinks 100× while preserving analytics.
69104
\item \textbf{New hooks (e.g., decorate\_module)} for module tagging and logging.
70105
\end{itemize}
71106
\end{frame}
72107

73108
% --- Performance ---
109+
110+
74111
\begin{frame}{Performance and Reliability}
75112
\begin{itemize}
76-
\item Spider cache and dependency engine now reduce `module avail` from 4s → <0.5s on 40k-module trees.
77113
\item Refactored collection handling avoids NFS metadata bottlenecks.
78114
\item Improved \texttt{family()} logic speeds hierarchical resolution.
79115
\item Expanded compatibility: bash, zsh, fish, and nushell all supported.
80116
\end{itemize}
81117
\end{frame}
82118

119+
\begin{frame}{Performance (II)}
120+
\begin{itemize}
121+
\item Discussion at EasyBuild Users Meeting EUM 25 at Juelich, Germany)
122+
\item Issues with modules with large dependencies ($>$ 50) loaded
123+
slowly
124+
\item Lmod supports \textbf{depends\_on()} and
125+
\textbf{depends\_on\_any()} to handle module X depends on modules
126+
A B C etc
127+
\item Lmod used to re-read all currently loaded modules to report
128+
any missing dependencies on module load and unload.
129+
\item This could be slow on disk based systems.
130+
\end{itemize}
131+
\end{frame}
132+
133+
134+
\begin{frame}{ModuleTable}
135+
\begin{itemize}
136+
\item Lmod stores its state in the user's environment
137+
\item Lmod calls this the \textbf{ModuleTable}.
138+
\item It is a Lua Table (what Python calls a \textbf{dict})
139+
\item It is base64 encoded and is \textbf{\_ModuleTable001\_} etc
140+
\item \textbf{module --mt} to see it decoded.
141+
\end{itemize}
142+
\end{frame}
143+
144+
% page
145+
\begin{frame}[fragile]
146+
\frametitle{Changes to ModuleTable to track dependencies}
147+
\begin{itemize}
148+
\item To remove re-reading all loaded modules
149+
\item Lmod now stores the dependencies in the ModuleTable
150+
\item As an example module A has dependencies:
151+
\end{itemize}
152+
{\small
153+
\begin{semiverbatim}
154+
\$ module --raw show A
155+
depends_on("x/1.0")
156+
depends_on_any("xx","yy/1.0")
157+
\end{semiverbatim}
158+
}
159+
\end{frame}
160+
161+
\begin{frame}[fragile]
162+
\frametitle{New ModuleTable}
163+
{\small
164+
\begin{semiverbatim}
165+
_ModuleTable_ = \{
166+
mT = \{
167+
A = \{
168+
depT = \{
169+
depA = \{ \{ sn = "x", version = \{... \},\}, \},
170+
doaA = \{ \{
171+
\{ sn = "xx", version = \{...\}, \},
172+
\{ sn = "yy", version = \{...\}, \},
173+
\}, \}, \}, \},
174+
...
175+
\},
176+
\}
177+
\end{semiverbatim}
178+
}
179+
\begin{itemize}
180+
\item where \textbf{depA} are the list of \textbf{depend\_on} modules
181+
\item and \textbf{doaA} are the list of \textbf{depend\_on\_any} modules
182+
\end{itemize}
183+
184+
\end{frame}
185+
186+
\begin{frame}{Performance Improvements}
187+
\begin{itemize}
188+
\item This and other changes improved module load times
189+
considerably
190+
\item Loading a module with 299 dependent modules on a Lustre
191+
filesystem
192+
\item Old: \textbf{40.2} seconds
193+
\item New \textbf{5.2} seconds
194+
\end{itemize}
195+
\end{frame}
196+
197+
% --- Better Error Reporting ----
198+
\begin{frame}{Better Error Reporting with AI help }
199+
\begin{itemize}
200+
\item EasyBuild Community wanted better error reporting when a
201+
module was not found when using the software hierarchy.
202+
\item Lmod's spider cache is used to build the software hierarchy.
203+
\item In some cases the spider cache had an error.
204+
\item Matthew Cawood used AI agents to find bug!
205+
\end{itemize}
206+
\end{frame}
207+
208+
\begin{frame}
209+
\begin{center}
210+
\includegraphics[width=\textwidth]{AI_805.png}
211+
\end{center}
212+
\end{frame}
213+
214+
215+
\begin{frame}{Upshot from AI Agents}
216+
\begin{itemize}
217+
\item Root Cause: The bug is in the fallback: lines (396-421) in src/MainControls.lua
218+
\item When `commonPaths` is empty only suggest paths from the module with the fewest paths.
219+
\item Proposed fix: ...
220+
\end{itemize}
221+
\end{frame}
222+
223+
% page
224+
\begin{frame}[fragile]
225+
\frametitle{Results}
226+
{\small
227+
\begin{semiverbatim}
228+
$ module load Python QGIS
229+
Lmod has detected the following error: ...
230+
Try: "module spider Python QGIS" to see how to load the module(s).
231+
Or load any one of these options:
232+
module load GCC/12.3.0 OpenMPI/4.1.5 Python QGIS
233+
\end{semiverbatim}
234+
}
235+
\end{frame}
236+
83237
% --- Documentation ---
84238
\begin{frame}{Documentation and Usability}
85239
\begin{itemize}
@@ -95,7 +249,7 @@
95249
\begin{itemize}
96250
\item Conditional \textbf{hide()/forbid()} rules by user, group, or time window.
97251
\item Deprecation warnings for outdated compilers or toolchains.
98-
\item Irreversible unloads enable post-cleanup or variable resets.
252+
\item Irreversible loads/unloads enable post-cleanup or variable resets.
99253
\item Simplifies compliance and software lifecycle management.
100254
\end{itemize}
101255
\end{frame}
@@ -134,15 +288,15 @@
134288
\item Docs: \href{https://lmod.readthedocs.io}{lmod.readthedocs.io}
135289
\item Mailing lists: \texttt{lmod-announce}, \texttt{lmod-users}.
136290
\item Contribute: report issues, propose hooks, share use cases.
291+
\item This Talk: https://github.com/TACC/lmod/tree/main/my\_docs/26/HPSF/presentation.pdf
137292
\end{itemize}
138293
\end{frame}
139294
140-
% --- Final Slide ---
295+
%% --- Final Slide ---
141296
\begin{frame}{Thank You}
142297
\begin{center}
143298
\Large Questions? Feedback?\\[3mm]
144-
\small Contact: \texttt{mcawood@tacc.utexas.edu} \\[3mm]
145-
\href{https://lmod.readthedocs.io}{\texttt{lmod.readthedocs.io}}
299+
\Large Join Lmod Mailing list: https://sourceforge.net/projects/lmod/lists/lmod-users
146300
\end{center}
147301
\end{frame}
148302

0 commit comments

Comments
 (0)