diff --git a/lectures/crash-course-day1.tex b/lectures/crash-course-day1.tex
new file mode 100644
index 0000000..88acf74
--- /dev/null
+++ b/lectures/crash-course-day1.tex
@@ -0,0 +1,694 @@
+\input{../common/config}
+
+\title{Simulation Role in Software and Hardware Development, Basic Concepts}
+
+\begin{document}
+
+\startslides
+
+\section{Complexity of Modern Computer Systems}
+
+\begin{frame}{Complexity of Modern Computer Systems}
+
+\centering
+\includegraphics[width=0.7\textwidth]{ic-floor}
+
+\tiny{Source: P. Horowitz and W. Hill. 1989. The Art of Electronics. Cambridge
+University Press, New York, NY, USA}
+
+\end{frame}
+
+\begin{frame}{Why Software Development Only On Real Hardware Is Not Beneficial?}
+
+\begin{itemize}
+\item Amount of available samples is usually limited,
+\item low-level debug is challenging,
+\item long development cycle.
+\end{itemize}
+
+$\Rightarrow$ development cost is increasing.
+
+\bigskip
+
+``\tiny{I've noticed a shift during the past couple of years towards an increasing
+use of various types of simulation, including virtual platforms. Previously
+software developers wanted real hardware, but now they have to start using
+simulation because there's no chip available.``
+\textit{Tomas Evensen, Wind River CTO}}
+
+\end{frame}
+
+\begin{frame}{Solution --- Software Models of Real Hardware}
+\centering 
+\inputpicture{idea}
+
+\end{frame}
+
+\section{Areas of Application}
+
+\begin{frame}{Areas of Application}
+\begin{itemize}
+\item New hardware development,
+\item software and hardware co-development,
+\item experimental architectures,
+\item power and performance prediction,
+\item compatibility with other architecures.
+\end{itemize}
+
+\end{frame}
+
+\begin{frame}{New Hardware Development}
+
+\inputpicture{error-cost}
+
+\end{frame}
+
+\begin{frame}{Software and Hardware Development}
+\begin{itemize}
+\item Firmware, BIOS, UEFI.
+\item Operation systems.
+\item Device drivers.
+\item Compilers.
+\item Applications.
+\end{itemize}
+
+\end{frame}
+
+\begin{frame}{``Shift Left`` --- Accelerated Product Development}
+
+\centering
+
+\includegraphics[width=1\textwidth]{shift-left} % TODO TikZ-elize this
+
+\begin{tiny}
+Impact of Shift left on hardware/software development.
+Source: Semiconductor Engineering
+\end{tiny}
+
+\vfill
+
+\begin{itemize}
+\item Allow early software development --- before silicon arrives.
+\item Shorten time to marked by overlapping hardware and software designs.
+\item Decouple software and hardware development.
+\item Validation of software, hardware, and their integration starts earlier.
+\end{itemize}
+
+\end{frame}
+
+\begin{frame}{Experimental Architectures}
+
+\begin{itemize}
+\item New Instruction Set Architectures (ISA).
+\item New ISA extensions.
+\item Multicore systems.
+\item Vector systems.
+\item Security and Cryptography.
+\end{itemize}
+
+\end{frame}
+
+\begin{frame}{Power and Performance Prediction}
+% TODO: explain a bit every item.
+\begin{itemize}
+\item Untimed,
+\item Loosely Timed,
+\item Approximately timed.
+\end{itemize}
+
+\end{frame}
+
+\begin{frame}{Compatibility with Other Architectures}
+\includegraphics[width=\textwidth]{compat} % TODO TikZ-elize this
+\end{frame}
+
+\section{Terminology}
+
+\begin{frame}{Terminology}
+\begin{itemize}
+\item \textbf{Simulation} --- replication of system's behavior that can be
+      observed through \textbf{\textit{external}} interaction with the system.
+\item \textbf{Emulation} --- replication of a system's behavior considering how
+      the system \textbf{\textit{internally}} works through imitation of all
+      internal structures and processes.
+\item \textbf{Virtualization} --- effective isolation of several systems from
+      each other with simultaneous and transparent access to resources of the
+      underlying system.
+\end{itemize}
+
+\end{frame}
+
+\begin{frame}{Types of Simulators}
+\begin{itemize}
+\item Full-platform,
+\item Application level,
+\item Functional,
+\item Cycle-accurate,
+\item Software,
+\item Hybrid.
+\end{itemize}
+\end{frame}
+
+\section{Capabilities}
+
+\begin{frame}{Some Simulation Capabilities}
+\begin{itemize}
+\item Non-intrusive inspection,
+\item Repeatability,
+\item Save/restore of simulated state,
+\item Synchronized system stop,
+\item Reverse execution.
+\end{itemize}
+\end{frame}
+
+\section{Simulated System}
+\begin{frame}{Simulated System}
+\centering
+% TODO: add a link from APIC to addr-decoder.
+% Not important for this presenation but the link exists.
+\begin{tikzpicture}[>=latex, font=\small, node distance = 0.5cm]
+
+\begin{scope}[minimum height=0.8cm]
+    \node[draw, ] (cpu) {CPU1};
+    \node[draw, below=of cpu] (mmu) {MMU1};
+
+    \node[draw, left=of cpu] (cpu2) {CPU2};
+    \node[draw, below=of cpu2] (mmu2) {MMU2};
+    \node[draw, right=2cm of cpu, ] (pic) {APIC};
+
+    \coordinate[above=of pic] (op);
+    \coordinate (mp) at (barycentric cs:mmu=0.5,mmu2=0.5);
+    \node[draw, below=1cm of mp] (addr-decoder) {addr-decoder};
+
+    \node[draw, below=2cm of mmu, text width=4.5cm, align = center, ] (dram) {RAM};
+    \node[draw, right=of pic, ] (pit) {PIT};
+\end{scope}
+
+\draw[<->] (cpu) -- (cpu |- mmu.north);
+\draw[<->] (cpu2) -- (cpu2 |- mmu2.north);
+
+\draw[<->] (mmu) -- (mmu |- addr-decoder.north);
+\draw[<->] (mmu2) -- (mmu2 |- addr-decoder.north);
+
+\draw[<->] (addr-decoder) -| (pit);
+\draw[<->] (addr-decoder) -- (addr-decoder |- dram.north);
+
+\draw[->, ] (pic) -- (cpu);
+\draw[->, ] (pit) -- (pic);
+\draw[->, ] (pic) -- (op) -| (cpu2);
+\end{tikzpicture}
+\end{frame}
+
+\section{Timer}
+
+\begin{frame}{Example \No1: Timer}
+\centering
+\begin{tikzpicture}[>=latex]
+\coordinate (center) at (0,0);
+\node[draw, text width = 2cm, above = 0.5 cmof center] (reference) {\texttt{reference}};
+\node[draw, text width = 2cm, below = 0.5cm of center] (counter) {\texttt{counter}};
+\node[draw, text width = 0.4cm, right = 2cm of center, shape = isosceles triangle, inner sep=1dd] (comparator) {=?};
+\node[right = of comparator] (int) {\#INT};
+\node[above = of reference] (reset) {\#RESET};
+\node[below = of counter] (enable) {\#ENABLE};
+\node[left = 1.5cm of reference] (ref-input) {REF};
+\node[left = 0.25cm of counter.north west] (clk) {\small{CLK}};
+
+% draw a quartz
+\coordinate (quartz) at ([xshift = -2cm]counter.west);
+ \node[]  at (quartz) {\small{F}};
+\draw (quartz) ++(-0.25,0.25) rectangle ++(0.5,-0.5);
+\draw (quartz) ++(-0.25,0.35) -- ++ (0.5,0);
+\draw (quartz) ++(-0.25,-0.35) -- ++ (0.5,0);
+\draw (reset) -- (reference);
+\draw (enable) -- (counter);
+
+% draw wires
+\draw (reference.east) -| ([xshift = -0.2cm]comparator.160) -- (comparator.160);
+\draw (counter.east) -| ([xshift = -0.2cm]comparator.200) -- (comparator.200);
+\draw (ref-input) -- (reference) node[midway] {\tiny{/}} node[midway, above] {16};
+\draw (quartz) ++(0.25,0) -- (counter);
+\draw[->] (comparator) -- (int);
+\node[draw, dashed, fit = (reference) (counter) (comparator)] {};
+\end{tikzpicture}
+\end{frame}
+
+\begin{frame}{Timing Diagram}
+\centering
+\begin{tikzpicture}[>=latex]
+% set clock and #INT time lines
+\draw[->] (0,0) -- (10,0) node[pos=0.95, below] {CLK};
+\draw[->] (0,1) -- (10,1) node[pos=0.0, above] {\#INT};
+
+\foreach \x in { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19} {
+    \draw (\x/2,-0.15) -- (\x/2,0.15)  {};
+    \coordinate (tick\x) at (\x/2, 1);
+};
+
+\draw[fill=black] (tick3) circle (0.1cm);
+\node[below = 0.15cm of tick3] (event-enable) {\tiny{ENABLE=1}};
+
+\draw[fill=black] (tick10) circle (0.1cm);
+\draw[fill=black] (tick17) circle (0.1cm);
+\draw (tick3)  -- ++(0, 1);
+\draw (tick10) -- ++(0, 1);
+\draw (tick17) -- ++(0, 1);
+\draw[<->] ([yshift=0.8cm]tick3) -- ([yshift=0.8cm]tick10) node[midway, above] {reference};
+\draw[<->] ([yshift=0.8cm]tick10) -- ([yshift=0.8cm]tick17) node[midway, above] {reference};
+
+% The actual #CLK plot
+\draw[thick] (tick1) -- (tick10) -- ++(0, 0.5) -- ++(0.5, 0) --
+            (tick11) -- (tick17) -- ++(0, 0.5) -- ++(0.5, 0) -- (tick18) -- (tick19);
+
+\end{tikzpicture}
+\end{frame}
+
+\begin{frame}[fragile]{Simulation With a Fixed Step Size}
+\begin{lstlisting}
+on_clk() {
+  if (enable) counter +=1;
+  if (counter == reference) {
+      raise_int();
+      counter = 0;
+  } else {
+      lower_int();
+  }
+}
+
+on_reset() {
+    reference = 0;
+    counter = 0;
+    enable = 0;
+}
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}{Typical Timer Characteristics}
+\begin{itemize}
+    \item $\mathsf{F} \approx 10$ MHz,
+    \item $\mathsf{reference} > 10^3$,
+    \item \#RESET --- no more than one per $\approx 100$ seconds.
+\end{itemize}
+\vfill
+$\Rightarrow$ externally visible effect (\#INT) occurs approximately once per
+$10^3$ cycles.
+\end{frame}
+
+\begin{frame}{Optimization}
+No modeling for externally invisible actions.
+\vfill
+\centering
+\begin{tikzpicture}[>=latex]
+% set clock and #INT time lines
+\draw[->] (0,1) -- (10,1) node[pos=0.0, above] {\#INT} node[pos=0.95, below] {t\textsubscript{sim}};
+
+\foreach \x in { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19} {
+    \coordinate (tick\x) at (\x/2, 1);
+};
+
+\draw[fill=black] (tick3) circle (0.1cm);
+
+\draw[fill=black] (tick10) circle (0.1cm);
+\draw[fill=black] (tick17) circle (0.1cm);
+\draw (tick3)  -- ++(0, 1);
+\draw (tick10) -- ++(0, 1);
+\draw (tick17) -- ++(0, 1);
+\draw[<->] ([yshift=0.8cm]tick3) -- ([yshift=0.8cm]tick10) node[midway, above] {reference};
+\draw[<->] ([yshift=0.8cm]tick10) -- ([yshift=0.8cm]tick17) node[midway, above] {reference};
+
+% The actual #CLK plot
+\draw[thick] (tick1) -- (tick10) -- ++(0, 0.5) -- ++(0, -0.5) --
+            (tick11) -- (tick17) -- ++(0, 0.5) -- ++(0, -0.5) -- (tick18) -- (tick19);
+\end{tikzpicture}
+\end{frame}
+
+\begin{frame}[fragile]{Discrete Event Simulation}
+\begin{lstlisting}
+typedef struct event {
+    time_t delta;
+    dev_t *device;
+    (*function)(dev_t *device);
+} event_t;
+
+event_t *event_queue;
+time_t sim_time = 0;
+for (event_t *e; e != NULL;
+     e = next_event(&event_queue)) {
+    e->function(e->device);
+    sim_time += e->delta;
+}
+\end{lstlisting}
+\end{frame}
+
+\section{Delayed Response}
+
+\begin{frame}{Example \No2: Waiting for a Response}
+\begin{center}
+\begin{tikzpicture}[>=latex]
+\node[draw, inner ysep=1cm] (dev1) {\texttt{dev1}};
+\node[draw, inner ysep=1cm, right= 3cm of dev1] (dev2) {\texttt{dev2}};
+
+\draw[->] (dev1.55) -- (dev2.125) node[midway] {\tiny{/}} node[midway, above] {address};
+
+\draw[->] (dev2) -- (dev1) node[midway] {\tiny{/}} node[midway, above] {data};
+
+\draw[->] (dev2.240) -- (dev1.300) node[midway, above] {\#RDY};
+\end{tikzpicture}
+\end{center}
+
+\begin{enumerate}
+\item Request from \texttt{dev1}: \texttt{address}.
+\item \texttt{dev2} calculates \texttt{data}.
+\item \texttt{dev2} notifies \texttt{dev1} about data readiness
+  \textit{after some time} $\Delta T$ by \#RDY.
+\item \texttt{dev1} works independently from \texttt{address} request to \#RDY
+  response.
+\end{enumerate}
+\end{frame}
+
+\begin{frame}{Implementation}
+\texttt{
+  dev1:
+  \begin{enumerate}
+    \item dev2.read(address);
+  \end{enumerate}
+  dev2:
+  \begin{enumerate}
+    \item data = get_data(address);
+    \item event_queue.post($\Delta T$, dev1, rdy());
+  \end{enumerate}
+  dev1:
+  \begin{enumerate}
+    \item rdy() { read(data); }
+  \end{enumerate}
+}
+\end{frame}
+
+\section{Theory}
+
+\begin{frame}{Event Queue}
+  \centering
+  \inputpicture{des}
+\end{frame}
+
+\begin{frame}{Event Content and Results}
+An event contains:
+\begin{itemize}
+\item time stamp ($\Delta T$ or absolute time),
+\item a function to be called,
+\item an object whose state is to be changed.
+\end{itemize}
+
+Event handling results:
+\begin{itemize}
+\item changes to state of the simulated system,
+\item added or destroyed events.
+\end{itemize}
+\end{frame}
+
+\begin{frame}{Questions}
+What should happen to the event queue when:
+\begin{enumerate}
+  \item \texttt{reference} written to?\pause
+  \item \#RESET happens?\pause
+  \item timer is disabled (ENABLE $\leftarrow$ 0)?\pause
+  \item \texttt{counter} is read?
+\end{enumerate}
+\end{frame}
+
+\begin{frame}[fragile]{Discrete Event Simulation Algorithm}
+\begin{lstlisting}
+typedef struct event event_t;
+
+struct event {
+    time_t delta;
+    dev_t *device;
+    (*function)(dev_t *device, event_t *queue);
+};
+
+event_t *event_queue;
+time_t sim_time = 0;
+while (!empty(&event_queue)) {
+    sim_time += get_delta(&event_queue);
+    evt_t *evt = pop_event(&event_queue);
+    evt->function(evt->device, &event_queue);
+}
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}{Event Properties}
+\begin{itemize}
+\item New event cannot be created in the past.
+\item Event handling can create new events.
+\item Event handling can cancel future (not yet handled) events.
+\item Several events may have the same time stamp.
+\end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Simics\reg~QSP example}
+\emph{Demo: qsp-clear-linux.simics}
+\footnotesize{\begin{verbatim}
+simics> peq
++--------------+------------------------+----------------------------+
+|    Cycle     |         Object         |        Description         |
++--------------+------------------------+----------------------------+
+|         51367|board.mb.sb.hpet        |tim_event                   |
+|       1174759|board.mb.sb.uhci[0]     |frame_update                |
+|       1174759|board.mb.sb.uhci[1]     |frame_update                |
+|       1174759|board.mb.sb.uhci[2]     |frame_update                |
+|       1174759|board.mb.sb.uhci[3]     |frame_update                |
+|       1174759|board.mb.sb.uhci[4]     |frame_update                |
+|       1174759|board.mb.sb.uhci[5]     |frame_update                |
+|    1011284267|board.mb.cpu0.core[0][0]|performance counter overflow|
+|    8470303804|board.mb.sb.lpc         |pm1_ovf                     |
+|37955235174759|board.mb.sb.rtc         |rtc.rtc_timer               |
++--------------+------------------------+----------------------------+
+\end{verbatim}}
+\end{frame}
+
+\section{Co-simulation}
+
+\begin{frame}{Simulation Techniques We Know}
+\begin{itemize}
+  \item Discrete event simulation: timer (non-executing device).
+  \item{<<Request --- response>> models: memory (instant).}
+  \item Interpretation, binary translation, direct execution:
+    processors (executing devices), will review on the next lecture.
+\end{itemize}
+\end{frame}
+
+\begin{frame}{Simulation Using DES and Executing Models}
+\centering
+\begin{tikzpicture}[>=latex, font=\scriptsize]
+  \draw[->] (-0.5,0) -- (10.5,0) node[pos=1, above, align=center] (sim-time) {Simulated\\Time};
+
+  \begin{scope}
+  \clip (0,-2) rectangle (10, 2.5);
+  \foreach \x in { 1, 2, 3, 4, 5, 6, 7, 8, 9} {
+      \draw (\x,-0.15) -- (\x,0.15) node (tick\x) {};
+  };
+
+  \node[shape=dart, draw, shape border rotate=270 ] at (1, 0.5) (event1) {};
+  \node[shape=dart, draw, shape border rotate=270 ] at (5, 0.5) (event2) {};
+  \node[shape=dart, draw, shape border rotate=270 ] at (9, 0.5) (event3) {};
+
+  \node[above of=event2] (deslabel) {Discrete Events};
+  \draw[->] (deslabel) -- (event1);
+  \draw[->] (deslabel) -- (event2);
+  \draw[->] (deslabel) -- (event3);
+
+  \draw (3,-0.5) ellipse[x radius = 2cm, y radius = 0.5cm] node {Processor Simulation};
+  \draw (7,-0.5) ellipse[x radius = 2cm, y radius = 0.5cm] node {Processor Simulation};
+
+  \draw (-1,-0.5) ellipse[x radius = 2cm, y radius = 0.5cm] node {} ;
+  \draw (11,-0.5) ellipse[x radius = 2cm, y radius = 0.5cm] node {} ;
+  \end{scope}
+\end{tikzpicture}
+\end{frame}
+
+\begin{frame}{Co-Simulation}
+\centering
+\begin{tikzpicture}[>=latex]
+  \node[draw, circle, text width = 3cm, text badly centered] (dessim) {Discrete event simulator};
+  \node[draw, circle, text width = 3cm, text badly centered, right = 2.5cm of dessim] (execsim) {Executing model simulator};
+
+  \draw (dessim.45)   edge[bend left = 45, ->] (execsim.135);
+  \node[above=1cm of execsim.135] {\small Timer to the next event};
+  \draw (execsim.225) edge[->, bend left = 45] (dessim.315);
+  \node[below=1cm of dessim.315] {\small Number of steps/cycles};
+\end{tikzpicture}
+\end{frame}
+
+\section{Multi-Processor Simulation}
+
+\begin{frame}{Simulation of a Multi-Processor System}
+\centering
+\begin{tikzpicture}[>=latex]
+  \node[draw, circle] (core1) {Core 1};
+  \node[draw, circle, right = of core1] (core2) {Core 2};
+  \node[draw, circle, right = of core2] (core3) {Core 3};
+  \node[right = of core3] (dots) {\dots};
+  \node[draw, circle, right = of dots] (coren) {Core $N$};
+
+  \coordinate[below = 2.3cm of core1] (c3);
+  \coordinate[below = 1.5cm of coren] (c4);
+
+  \node[draw, fit = (c3) (c4), inner ysep=1pt] (shmem) {Shared memory};
+
+  \draw[<->] (core1.south) -- (shmem);
+  \draw[<->] (core2.south) -- (shmem);
+  \draw[<->] (core3.south) -- (shmem);
+  \draw[<->] (coren.south) -- (shmem);
+\end{tikzpicture}
+\end{frame}
+
+\begin{frame}{Step-by-Step}
+\begin{itemize}
+\item How to maintain simultaneous instruction simulation for all guest
+  processors?\pause~\textit{Execute no more than one instruction at a time.}
+\item It will be extremely slow! Maybe it is possible to simulate multiple guest
+  instructions without switching?\pause
+\item How many?\pause~How much time inter processor communication takes in
+  hardware?
+\end{itemize}
+\end{frame}
+
+% TODO: Add a slide with simulation speed from time quantum dependency using
+% qsp-clear-linux.simics
+
+\begin{frame}[fragile]{Temporal Decoupling --- Real Time}
+\begin{center}
+\begin{tikzpicture}[>=latex]
+  \draw[->] (0,0) -- (8,0) node[pos=1, below, align=center] (sim-time) {Real\\Time};
+
+  \foreach \x in { 1, 2, 3, 4, 5, 6, 7} {
+      \draw (\x,-0.15) -- (\x,0.15) node (tick\x) {};
+  };
+  \matrix[anchor=south west] at (-0.5,0.5){
+    \node {CPU3}; & & & \node[shape=single arrow, draw, text width = 2cm, inner xsep = 0cm, fill=black!5] (arr3) {}; \\
+    \node {CPU2}; & & \node[shape=single arrow, draw, text width = 2cm, inner xsep = 0cm, fill=black!10] (arr2) {}; & \\
+    \node {CPU1}; & \node[shape=single arrow, draw, text width = 2cm, inner xsep = 0cm, fill=black!15] (arr1) {}; & & \\
+  };
+
+  \draw[->] (arr1.east) -- (arr2.west);
+  \draw[->] (arr2.east) -- (arr3.west);
+\end{tikzpicture}
+\end{center}
+\end{frame}
+
+\begin{frame}[fragile]{Temporal Decoupling --- Simulated Time}
+\begin{center}
+\begin{tikzpicture}[>=latex]
+  \draw[->] (0,0) -- (8,0) node[pos=1, below, align=center] (sim-time) {Simulated\\Time};
+
+  \foreach \x in { 1, 2, 3, 4, 5, 6, 7} {
+      \draw (\x,-0.15) -- (\x,0.15) node (tick\x) {};
+  };
+  \matrix[anchor=south west] at (-0.5,0.5){
+    \node {CPU3}; & \node[shape=single arrow, draw, text width = 2cm, inner xsep = 0cm, fill=black!5] (arr3) {};  \\
+    \node {CPU2}; & \node[shape=single arrow, draw, text width = 2cm, inner xsep = 0cm, fill=black!10] (arr2) {}; \\
+    \node {CPU1}; & \node[shape=single arrow, draw, text width = 2cm, inner xsep = 0cm, fill=black!15] (arr1) {}; \\
+  };
+\end{tikzpicture}
+\end{center}
+\end{frame}
+
+\begin{frame}{Quantum (Quota)}
+Quantum (Quota) --- how many instructions a guest processor can run before
+giving control back to the simulator.
+\vfill
+\begin{itemize}
+\item A processor can run fewer instruction than dedicated by the quota.
+\item Too big quota can cause unexpected behavior.
+\item In a DES-based simulator pseudo-events can be used to cause guest
+ processor switch.
+\end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Example \No3: \texttt{qsp-clear-linux.simics}}
+Quantum on 8 core system:
+\begin{lstlisting}[mathescape=true,keywordstyle=\ttfamily]
+simics> cpu-switch-time
+Current time quantum: 100.0 $\mu$s
++--------------+------------------------+
+|Cycles/quantum|         Clock          |
++--------------+------------------------+
+|     200000.00|board.mb.cpu0.core[0][0]|
+|     200000.00|board.mb.cpu0.core[0][1]|
+|     200000.00|board.mb.cpu0.core[1][0]|
+|     200000.00|board.mb.cpu0.core[1][1]|
+|     200000.00|board.mb.cpu0.core[2][0]|
+|     200000.00|board.mb.cpu0.core[2][1]|
+|     200000.00|board.mb.cpu0.core[3][0]|
+|     200000.00|board.mb.cpu0.core[3][1]|
++--------------+------------------------+
+Default time quantum not set yet
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]{Example \No4: \texttt{qsp-clear-linux.simics}}
+Simulated time on 8 core system:
+\begin{verbatim}
+running> ptime -all
++------------------------+----------+------------+--------+
+|       Processor        |  Steps   |   Cycles   |Time (s)|
++------------------------+----------+------------+--------+
+|board.mb.cpu0.core[0][0]|1376450747|107696800000|  53.848|
+|board.mb.cpu0.core[0][1]| 746604719|107696600220|  53.848|
+|board.mb.cpu0.core[1][0]| 746604647|107696600000|  53.848|
+|board.mb.cpu0.core[1][1]| 746604686|107696600000|  53.848|
+|board.mb.cpu0.core[2][0]| 746604725|107696600000|  53.848|
+|board.mb.cpu0.core[2][1]| 746604764|107696600000|  53.848|
+|board.mb.cpu0.core[3][0]| 746604803|107696600000|  53.848|
+|board.mb.cpu0.core[3][1]| 746604842|107696600000|  53.848|
++------------------------+----------+------------+--------+
+\end{verbatim}
+\end{frame}
+
+\section*{Conclusions}
+
+\begin{frame}{Conclusions}
+\begin{itemize}
+\item Software models are created before hardware availability.
+\item Software models are used for software-hardware co-development.
+\item Simulation provides unique debugging and development capabilities.
+\item Simulation with a fixed step.
+\item Event-driven simulation.
+\item Discrete Event Simulation:
+  \begin{itemize}
+  \item Event creation.
+  \item Event handling.
+  \item Event destruction.
+  \end{itemize}
+\item Co-simulation for executing and non-executing devices.
+\item Multi-processor simulation:
+  \begin{itemize}
+    \item Temporal Decoupling.
+    \item Time quantum.
+  \end{itemize}
+\end{itemize}
+\end{frame}
+
+\begin{frame}[allowframebreaks]{Bibliography}
+\begin{thebibliography}{99}
+  \bibitem{} [RUS] \textit{Речистов~Г.С, Юлюгин~Е.А и др.},
+    Программное моделирование вычислительных систем.
+    \url{https://github.com/grigory-rechistov/simbook/blob/master/metoda/main-web.pdf}
+  \bibitem{} \textit{James Smith, Ravi Nair}, Virtual machines -- Versatile
+    Platforms for Systems and Processes.
+  \bibitem{} \textit{John Wiley \& Sons, Inc., ed. by J. Banks}. Handbook of
+    Simulation. Principles, Methodology, Advances, Applications, and Practice.
+  \bibitem{} \textit{J.~Engblom}. Temporal Decoupling - Are “Fast” and
+    “Correct” Mutually Exclusive?
+\end{thebibliography}
+\end{frame}
+
+\begin{frame}{On the Next Lecture:}
+Simulation of architectural state:
+\begin{itemize}
+\item CPU instructions simulation
+\end{itemize}
+\end{frame}
+\finalslide
+
+\end{document}
diff --git a/lectures/crash-course-day2.tex b/lectures/crash-course-day2.tex
new file mode 100644
index 0000000..3cbba46
--- /dev/null
+++ b/lectures/crash-course-day2.tex
@@ -0,0 +1,651 @@
+\input{../common/config}
+
+\usepackage{tikz}
+\usetikzlibrary{shapes, calc, arrows, decorations.markings, decorations.pathreplacing, decorations.pathmorphing, decorations, patterns, chains, snakes, backgrounds, positioning, fit, shadows}
+\title{Processor instruction simulation}
+
+\begin{document}
+
+\section{Start}
+\startslides
+
+\begin{frame}{Simulated System}
+\centering
+\vfill
+\inputpicture{cpu-mem}
+\vfill
+\end{frame}
+
+\section{Interpretation Pipeline}
+
+% TODO: This is not a pipeline! Add a proper pipeline picture.
+% s/pipeline/execution stages/g
+
+\begin{frame}{Basic 5-Stage Pipeline}
+\centering
+\inputpicture{interpreter-cycle}
+\end{frame}
+
+\begin{frame}[fragile]{Switched interpreter}
+\begin{lstlisting}
+while (run) {
+    raw_code = fetch(PC);
+    (opcode, operands) = decode(raw_code);
+    switch (opcode) {
+
+    case opcode1:
+        func1(operands); PC++; break;
+
+    case opcode2:
+        func2(operands); PC++; break;
+
+    /*...*/
+    }
+}
+\end{lstlisting}
+\end{frame}
+
+\subsection{Fetch}
+
+\begin{frame}[fragile]{Fetch}
+\texttt{data = mem[pc];}\pause
+\vfill
+Do not forget about address translation:
+\begin{lstlisting}
+paddr = v2p(pc); // pc is a virtual address
+data = mem[paddr];
+\end{lstlisting}
+\end{frame}
+
+% TODO: Add a slide about paging. People don't know about it at the moment.
+
+\begin{frame}{Fetch}
+<<Simple>> memory read?
+\pause\bigskip
+\begin{itemize}
+\item Non-execute page.
+\pause\bigskip
+\item Unaligned accesses cause effects on some architectures.
+\pause\bigskip
+\item Cross-page accesses. \\
+The pages may have different access rights.
+\end{itemize}
+\end{frame}
+
+\subsection{Decode}
+
+\begin{frame}{Decode}
+Decoding --- translation of instruction data from machine code to internal
+(high-level) representation suitable for further analysis.
+\end{frame}
+
+\begin{frame}{Example 1: RISC-V}
+\centering
+\includegraphics[width=.9\textwidth]{risc-v-formats}
+
+\tiny{Source: The RISC-V Instruction Set Manual, Volume I: Unprivileged ISA,
+      Document Version 20191213, page 16}
+\end{frame}
+
+\begin{frame}[fragile]{Example 1: RISC-V decoder (1/3)}
+\begin{lstlisting}
+#define BIT_FIELD(v, e, s) \
+    (v >> s) & ((1 << (e - s + 1)) - 1)
+
+static inline int32_t
+sign_extend(uint32_t v, int width) {/* ... */};
+
+typedef struct decode {
+    uint32_t opcode;
+    uint32_t rd;
+    uint32_t rs1;
+    uint32_t rs2;
+    int32_t  imm;
+    uint32_t funct3;
+    uint32_t funct7;
+} decode_t;
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]{Example 1: RISC-V decoder (2/3)}
+\begin{lstlisting}
+decode_t
+decode(uint32_t raw) {
+    uint32_t op = BIT_FIELD(raw, 6, 0);
+    switch (type(op)) {
+    case I_type:
+         return decode_i_type(raw);
+    case R_type:
+         return decode_r_type(raw);
+    /../
+    }
+}
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]{Example 1: RISC-V decoder (3/3)}
+\begin{lstlisting}
+decode_t
+decode_i_type(uint32_t raw) {
+    uint32_t op = BIT_FIELD(raw, 6, 0);
+    uint32_t rd = BIT_FIELD(raw, 11, 7);
+    uint32_t funct3 = BIT_FIELD(raw, 14, 12);
+    uint32_t rs1 = BIT_FIELD(raw, 19, 15);
+    int32_t imm = sign_extend(
+        BIT_FIELD(raw, 31, 20), 12);
+
+    return (decode_t){.op = op, .rd = rd,
+                      .funct3 = funct3, .rs1 = rs1,
+                      .imm = imm};
+}
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}{Example 3: Intel\reg~IA-32}
+\centering
+\includegraphics[width=.9\textwidth]{ia32-evex}
+
+\tiny{J.C.S. Adrian et al. Systems, Apparatuses, and Methods for Blending Two
+      Source Operands into a Single Destination Using a Writemask. US Patent
+      Application Publication. \No~2012/0254588 A1}
+\end{frame}
+
+\begin{frame}{What to Fetch From Machine Code?}
+\begin{centering}
+\inputpicture{instruction-anatomy}
+\end{centering}
+\vfill
+Input: machine code.
+
+Output:
+\begin{itemize}
+\item Success, failure, not enough data.
+\item In case of success: instruction length.
+\item In case of success: information about operands.
+\item In case of success: simulation routine.
+\end{itemize}
+\end{frame}
+
+\begin{frame}{Decode}
+\begin{itemize}
+\item Decoders are usually generated from ISA description.
+\item In general: classical problem of parser/synax analyser construction.
+\item In practice: special tools and languages.
+\item Example: Intel\reg~XED (x86 encoder-decoder). \url{https://github.com/intelxed/xed}
+\end{itemize}
+\end{frame}
+
+\begin{frame}{Decode: harsh reality}
+\begin{itemize}
+\item Variable instruction length. Intel\reg~IA-32: from 1 to 15 bytes. How many bytes to decode at once?
+\item Decoding results depends of prefixes and execution mode. Example: 0x40-0x4f in Intel\reg~IA-32/Intel\reg~64/AMD64.
+\end{itemize}
+\end{frame}
+
+\begin{frame}{Disassemble}
+\begin{itemize}
+\item Disassemble --- translate from machine code into human readable
+  representation (mnemonic, assembly).
+\item Encode (assemble) --- translate from mnemonic to machine code.
+\end{itemize}
+\end{frame}
+
+\subsection{Execute}
+
+\begin{frame}{Execute}
+\begin{itemize}
+\item Basic block --- simulation function for one instruction (a.k.a.~service routine).
+\item Service routines are tipically written in high-level programming
+  languages: portable solution.
+\item Generators are often used.
+\item Example: SimGen --- single discription is used to generate decoder,
+  disassembler and service routines.
+\end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Simulated state}
+\begin{lstlisting}
+typedef struct {
+    uint32_t pc;
+
+    uint32_t regs[16];
+
+    bool z_flag;
+    bool n_flag;
+    bool o_flag;
+    bool c_flag;
+} cpu_t;
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]{Example: ADD reg reg reg}
+\begin{lstlisting}
+
+void add32_rrr(cpu_t *cpu, int src1, int src2, int dst) {
+    cpu->regs[dst] = cpu->regs[src1]
+                   + cpu->regs[src2];
+\end{lstlisting}
+\pause
+
+\begin{lstlisting}
+    cpu->z_flag = cpu->regs[dst] == 0;
+    cpu->n_flag = cpu->regs[dst] & (1 << 31);
+    cpu->o_flag = cpu->regs[dst] < 
+            MAX(cpu->regs[src1], cpu->regs[src2]);
+    cpu->c_flag = calc_c_flag(cpu->regs[src1],
+                              cpu->regs[src2]);
+}
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}{Intel\reg~IA-32 CALL}
+\centering
+\includegraphics[width=\textwidth]{ia32-call}
+
+\tiny{Source: Intel\reg~64 and IA-32 Architectures Software Developer’s Manual,
+      Order Number: 325462-073US, pages 716-732.}
+\end{frame}
+
+\subsection{Memory}
+
+\begin{frame}[fragile]{Memory}
+<<Ordinary>> memory access:
+\vfill
+\begin{lstlisting}
+write_mem(cpu, dst_addr, data, size);
+data = read_mem(cpu, dst_addr, size);
+\end{lstlisting}
+\pause\vfill
+\begin{itemize}
+\item Attempt to change read-only memory,
+\item Unaligned address,
+\item Cross-page access.
+\end{itemize}
+\end{frame}
+
+\subsection{Exceptions}
+
+\begin{frame}{Accurate Pipeline}
+\centering
+\resizebox{9cm}{7cm}{\inputpicture{interpreter-cycle-exception}}
+\end{frame}
+
+\begin{frame}{Classification}
+\begin{itemize}
+\item Exception --- synchronous, without repeating of current instruction.
+\item Fault --- synchronous, with repeating of current instruction.
+\item Trap --- synchronous, without repeating of current instruction, intentoinal.
+\item Interrupt --- external, asynchronous.
+\item Abort --- external, asynchronous, no return point.
+\end{itemize}
+\end{frame}
+
+\subsection{Write-Back}
+
+\begin{frame}{Write-Back}
+\begin{itemize}
+  \item Processor state should be updated after all excecption checks to avoid
+    partially changed state.
+  \bigskip
+  \item Advance \texttt{\$PC}:
+  \pause\bigskip
+  \begin{itemize}
+    \item For most instructions: \texttt{\$PC += instruction_length}. \\
+    Exception: \texttt{REP MOVS}.
+    \pause\bigskip
+    \item Explicit \texttt{\$PC} update --- control-flow instructions:
+    \begin{itemize}
+      \item (Un)conditional (In)direct Jump/Branch,
+      \item Call/Return (subroutine).
+      \item System call/return.
+      \item ...
+    \end{itemize}
+  \end{itemize}
+\end{itemize}
+\end{frame}
+
+\section{Improved Interpretation}
+
+\begin{frame}{Interpretation Pros and Cons}
+\begin{itemize}
+\item Implemented in high-level language --- portable.
+\item Simple structure: reliable, extensible, re-usasble.
+\item (Extremely) low simulation speed.
+\end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Where Is the Time Spent?}
+\begin{lstlisting}
+start: interruption = false;
+while (!interruption) {
+    raw_code = fetch(PC);
+    (opcode, operands) = decode(raw_code); // <-- here
+    switch (opcode) { // <-- and here
+    case opcode1:
+        func1(operands); PC++; break;
+    case opcode2:
+        func2(operands); PC++; break;
+    /*...*/
+    }
+}
+handle_interruption();
+goto start;
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]{Threaded Interpretation}
+Jump right to the next instruction instead of start of the loop:
+\bigskip
+\begin{lstlisting}
+func0: /* simulate instr0 */; PC++;
+  next_opcode = decode(fetch(PC));
+  goto func_ptr[next_opcode];
+func1: /* simulate instr1 */; PC++;
+  next_opcode = decode(fetch(PC));
+  goto func_ptr[next_opcode];
+func2: /* simulate instr2 */; PC++;
+  next_opcode = decode(fetch(PC));
+  goto func_ptr[next_opcode];
+\end{lstlisting}
+
+\tiny\url{http://stackoverflow.com/questions/11227809/why-is-processing-a-sorted-array-faster-than-an-unsorted-array}
+\end{frame}
+
+\begin{frame}{Cached Interpretation}
+\begin{itemize}
+\item Usually guest code is static.
+\item It's highly probable that an instruction with some \texttt{\$PC} will be
+  executed many times.
+\item Why decode every time?
+\item Solution: create a cache mapping instruction address to decode data.
+\end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Cached Interpretation}
+\begin{lstlisting}
+while (!interruption) {
+  if (operation = cache[PC]); // shortcut
+  else { // not cached, full path
+    operation = decode(fetch(PC));
+    cache[PC] = operation; // cache the result
+  }
+  switch (operation) {
+     /* ... */
+  }
+}
+\end{lstlisting}
+\end{frame}
+
+% TODO: slide with diagram.
+
+\begin{frame}{Cached Interpretation}
+\begin{itemize}
+\item Cache size is limited.
+\item Old data needs to be removed from the cache.
+\item Code modifications need to be tracked. Otherwise cache will have invalid
+  data.
+\end{itemize}
+\end{frame}
+
+\begin{frame}{What Was Optimized in Interpreter}
+\begin{itemize}
+\item \textbf<1>{Fetch} {$\leftarrow$ optimized}
+\item \textbf<1>{Decode} {$\leftarrow$ optimized}
+\item \textbf<2>{Execute} {$\rightarrow$ to be optimized}
+\item \textbf<2>{Memory} {$\rightarrow$ to be optimized}
+\item \textbf<2>{Write-Back} {$\rightarrow$ to be optimized}
+\end{itemize}
+\end{frame}
+
+\section{Binary translation}
+
+\begin{frame}{Translation, Compilation, Decompilation}
+\begin{itemize}
+\item \textbf{Translation} --- \textit{generic term} describing a process of
+  code conversion from one programming language into another.
+\item \textbf{Compilation} --- \textit{translation} from high-level programming
+  language into low-level programming language.
+\item \textbf{Decompilation} --- \textit{translation} from low-level programming
+  language into a high-level programming language.
+\end{itemize}
+\end{frame}
+
+\begin{frame}{Binary Translation}
+\begin{itemize}
+\item Input: guest machine code.
+\item Output: host machine code.
+\item \textbf{Binary translation, BT} --- translation of guest software written in
+  guest ISA into equivalent code in host ISA.
+\item What for? \pause Repetitive execution of translation result. Optimizations.
+\end{itemize}
+\end{frame}
+
+\begin{frame}{Static and Dynamic Binary Translation}
+\begin{itemize}
+\item \textit{Static} binary translator converts a target executable file
+  without running it.
+\item Result of static BT is saved on disk.
+\item It is very difficult to do correctly.
+\vfill
+\item \textit{Dynamic} binary translation happens during simulation.
+\item Result of dynamic BT is saved in memory.
+\item Dynamic BT can adopt to program's run-time environment.
+\item Dynamic BT alternates with execution of the generated code.
+\end{itemize}
+\end{frame}
+
+\begin{frame}{Stages of Dynamic Binary Translation}
+\centering
+\inputpicture{dynamic-bt}
+\end{frame}
+
+\subsection{Template-Based Translation}
+
+\begin{frame}{Algorithm 1: Template-Based Translation}
+\begin{tikzpicture}[font=\scriptsize, >=latex, node distance=2.cm]
+
+\node[draw, double copy shadow={shadow xshift=3pt,shadow yshift=-3pt}, fill=white] (decode) {decode_t};
+\node[rectangle split, rectangle split parts=4, draw, right=of decode, anchor=text west, minimum width=1.8cm] (templates-raw) {Template 1\nodepart{two} Template 2\nodepart{three} Template 3\nodepart{four} Template 4};
+
+\node[rectangle split, rectangle split parts=4, draw, right=of templates-raw, minimum width=1.8cm] (templates) {Capsule 1\nodepart{two} Capsule 2\nodepart{three} Capsule 3\nodepart{four} Capsule 4};
+
+\node[draw, above=1cm of templates-raw, align=left] (md) {Encodings and Offsets\\for Host Instruction\\Operands};
+
+\draw[->] (decode) -- (templates-raw.text west) node[midway, above]{\tiny Template Selection};
+\draw[->] (templates-raw) -- (templates);
+\coordinate[above=0.5cm of templates] (junction);
+\draw[->] (decode) |- (junction) -- (templates) node[pos=0, above]{\tiny Argument Substitution};
+\draw[] (md) |- (junction);
+\end{tikzpicture}
+\end{frame}
+
+\begin{frame}[fragile]{Algorithm 1: Template-Based Translation}
+\begin{tiny}
+\begin{itemize}
+    \item start_addr --- guest code's start address,
+    \item start_buf --- host buffer.
+\end{itemize}
+\end{tiny}
+
+\begin{lstlisting}
+translate(start_addr, start_buf) {
+    PC = start_addr; bufptr = start_buf;
+    while (!enough) {
+        instr = fetch(PC);
+        (opcode, operands) = decode(instr);
+        (template, length) = templates[opcode];
+        memcpy(bufptr, template, length);
+        patch_operands(bufptr, operands);
+        PC += instr_length;
+        bufptr += length;
+    }
+    memcpy(bufptr, glue_capsule, glue_length);
+}
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]{Algorithm 1: Execution}
+
+\begin{lstlisting}
+execute(start_buf) {
+    load_simulated_state();
+    goto start_buf;
+}
+\end{lstlisting}
+\pause
+or
+\begin{lstlisting}
+typedef void (*fblock)(void);
+execute(start_buf) {
+    load_simulated_state();
+    ((fblock)start_buf)();
+}
+\end{lstlisting}
+\end{frame}
+
+% TODO: Add JIT-template for the same ADDQ instruction
+
+\begin{frame}{Capsule}
+\begin{small}
+\begin{tabular}{p{0.45\textwidth}p{0.45\textwidth}}
+Guest code, Intel~64 (64-bit) & Host code, Intel~IA-32 (32-bit)
+\end{tabular}
+\end{small}
+\vfill
+\centering
+\inputpicture{capsule}
+\pause
+Question: What part of \texttt{ADDQ} semantics is missing?
+\end{frame}
+
+\begin{frame}{Argument Substitution}
+
+{\ttfamily\small
+{\sffamily Registers:}
+
+\begin{tabular}{ll}  
+c5 f4 58 c\textcolor{red}{8}  &    vaddps \textcolor{red}{\%ymm0},\%ymm1,\%ymm1 \\
+c5 f4 58 c\textcolor{red}{9}  &    vaddps \textcolor{red}{\%ymm1},\%ymm1,\%ymm1 \\
+c5 f4 58 c\textcolor{red}{f}  &    vaddps \textcolor{red}{\%ymm7},\%ymm1,\%ymm1 \\\pause
+c4 c1 74 58 c\textcolor{red}{8} &  vaddps \textcolor{red}{\%ymm8},\%ymm1,\%ymm1 \\
+c4 c1 74 58 c\textcolor{red}{f} &  vaddps \textcolor{red}{\%ymm15},\%ymm1,\%ymm1 \\
+c5 f4 58 c8  &    vaddps \%ymm0,\%ymm1,\%ymm1 \\
+c5 ec 58 d0  &    vaddps \%ymm0,\%ymm2,\%ymm2 \\
+c5 c4 58 f8  &    vaddps \%ymm0,\%ymm7,\%ymm7 \\\pause
+c4 e1 74 58 c8 &  vaddps \%ymm0,\%ymm1,\%ymm1 \# Mnemonic is the same!\\
+\end{tabular}
+\pause
+{\sffamily Literals:}
+\begin{tabular}{ll}
+67 c7 85 \textcolor{blue}{00 01 00 00} \textcolor{green}{dd cc bb aa}   & movl \textcolor{green}{\$0xaabbccdd},\textcolor{blue}{0x100}(\%ebp)
+\end{tabular}
+}
+
+\end{frame}
+
+\subsection{Translation with Intermediate Representation}
+
+\begin{frame}{Algorithm 2: JIT. IR generation}
+\centering
+\begin{tikzpicture}[font=\small, >=latex]
+
+\node[rectangle split, rectangle split parts=2, draw, double copy shadow={shadow xshift=3pt,shadow yshift=-3pt}, fill=white] (sr) {Simulation routine\nodepart{two} С (subset)};
+\node[rectangle split, rectangle split parts=2, draw, below=1cm of sr, double copy shadow={shadow xshift=3pt,shadow yshift=-3pt}, fill=white] (template) {Template\nodepart{two} IR: bytecode+SSA};
+\draw[->] (sr) -- (template) node[midway, right] {SR-compiler};
+
+\end{tikzpicture}
+\end{frame}
+
+\begin{frame}{Algorithm 2: JIT. Simulation Stage}
+\begin{tikzpicture}[font=\scriptsize, >=latex, node distance=1.cm]
+
+\node[draw, double copy shadow={shadow xshift=3pt,shadow yshift=-3pt}, fill=white] (decode) {decode_t};
+\node[rectangle split, rectangle split parts=4, draw, right=of decode, anchor=text west, minimum width=1.8cm] (templates) {Template 1\nodepart{two} Template 2\nodepart{three} Template 3\nodepart{four} Template 4};
+\draw[->] (decode) -- (templates.text west);
+
+\node[draw, below=0.5cm of templates, minimum height=2cm, align=left, minimum width=1.8cm] (opt-template) {Block\\template};
+\draw[->] (templates) -- (opt-template) node[midway, right] {Optimization};
+
+\node[rectangle split, rectangle split parts=3, align=left, draw, below=0.5cm of opt-template, double copy shadow={shadow xshift=3pt,shadow yshift=-3pt}, fill=white] (md) {Machine Description (md)\nodepart{two} Bytecode $\rightarrow$ machine code\nodepart{three} Host Register Definition};
+
+\coordinate[right=2.5cm of opt-template] (junction);
+
+\node[draw, right=1cm of junction, rectangle split, rectangle split parts=2,] (host-code) {Translation Block\nodepart{two} Host machine code};
+
+\draw[->] (md) -| node[pos=0.5, right, align=left] {Register Allocation\\Code Generation} (junction) -- (host-code);
+\draw[] (opt-template) -- (junction);
+
+\end{tikzpicture}
+\end{frame}
+
+\begin{frame}{Optimizations}
+\centering
+\inputpicture{bt-optimization}
+\end{frame}
+
+\begin{frame}{Connection between Translation Blocks}
+\centering
+\inputpicture{bb-translation}
+\end{frame}
+
+\begin{frame}{Why Optimizations During BT Are Complicated?}
+
+\begin{itemize}
+\item Machine code has less information about the algorithm compared to code in
+  high-level programming languages.
+\item Many compiler optimizations cannot be used.
+% TODO: Examples with clarification.
+\item BT optimizations are limited in time.
+\end{itemize}
+\pause
+\begin{itemize}
+\item Variable addresses --- not available.
+\item Function boundaries --- not available.
+\item Branch addresses --- partially known.
+\end{itemize}
+\end{frame}
+
+\section*{Conclusions}
+
+\begin{frame}{Conclusions}
+\begin{itemize}
+\item Basic 5-stage pipeline.
+\item Decoder, disassembler, encoder.
+\item Switched interpreter.
+\item Threaded interpreter.
+\item Cached interpreter.
+\item Exeption, Interrupt, Trap, Fault\dots
+\item Interpretation, Compilation, Translation.
+\item Binary Translation.
+\item Static and Dynamic Binary Translation.
+\item Template, Capsule.
+\item Intermediate Representation.
+\end{itemize}
+\end{frame}
+
+\begin{frame}[allowframebreaks]{Bibliography}
+\begin{thebibliography}{99}
+  \bibitem{} \textit{D. Mihoka, S. Shwartsman}. Virtualization Without Direct
+    Execution or Jitting: Designing a Portable Virtual Machine Infrastructure.
+  \bibitem{} \textit{Y. Lifshitz, R. Cohn, I. Livni, O. Tabach, M. Charney, K.
+    Hazelwood}. Zsim: A Fast Architectural Simulator for ISA Design-Space
+    Exploration.
+  \bibitem{} \textit{F. Larsson, P. Magnusson, B. Werner}. SimGen: Development of
+    Efficient Instruction Set Simulators.
+  \bibitem{} \textit{A. Sepp, J. Kranz, A. Simon}. GDSL: A Generic Decoder
+    Specification Language for Interpreting Machine Language.
+  \bibitem{} \textit{Jim Smith and Ravi Nair}. Virtual Machines: Versatile
+  Platforms for Systems and Processes.
+  \bibitem{} \textit{Fabrice Bellard}. QEMU, a Fast and Portable Dynamic
+  Translator.
+  \bibitem{} \textit{Anton Chernoff and Ray Hookway.} {DIGITAL FX!32} Running
+  32-Bit x86 Applications on {Alpha} {NT}.
+  \bibitem{} \textit{Leonid Baraz [et al.]} IA-32 Execution Layer: a Two-Phase
+  Dynamic Translator Designed to Support IA-32 Applications on
+  Itanium\reg-Based Systems.
+\end{thebibliography}
+\end{frame}
+
+\finalslide
+
+\end{document}