Skip to content

Commit d01549c

Browse files
committed
show context window usage
1 parent a536f2a commit d01549c

File tree

3 files changed

+36
-7
lines changed

3 files changed

+36
-7
lines changed

src/App.jsx

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ function setStoredModel(modelId) {
4545
}
4646
}
4747

48+
// Get context window size for different models
49+
function getContextWindowSize(modelUrl) {
50+
const model = AVAILABLE_MODELS.find(m => m.url === modelUrl);
51+
return model?.contextSize || 8192; // Default to 8k if unknown
52+
}
53+
4854
function App() {
4955
// Create a reference to the worker object.
5056
const worker = useRef(null);
@@ -68,6 +74,7 @@ function App() {
6874
const [queuedMessage, setQueuedMessage] = useState(null); // For storing message when model is loading
6975
const [tps, setTps] = useState(null);
7076
const [numTokens, setNumTokens] = useState(null);
77+
const [contextTokens, setContextTokens] = useState(null); // Current context window usage
7178

7279
// Mobile detection
7380
const [isMobile, setIsMobile] = useState(false);
@@ -249,9 +256,10 @@ function App() {
249256
{
250257
// Generation update: update the output text.
251258
// Parse messages
252-
const { output, tps, numTokens } = e.data;
259+
const { output, tps, numTokens, contextTokens } = e.data;
253260
setTps(tps);
254261
setNumTokens(numTokens);
262+
setContextTokens(contextTokens);
255263
setMessages((prev) => {
256264
const cloned = [...prev];
257265
const last = cloned.at(-1);
@@ -489,6 +497,17 @@ function App() {
489497
{!isRunning && (
490498
<span className="mr-1">&#41;.</span>
491499
)}
500+
{contextTokens && (
501+
<>
502+
<span className="mx-2"></span>
503+
<span className="text-gray-500 dark:text-gray-300">
504+
Context: {contextTokens.toLocaleString()}/{getContextWindowSize(selectedModel).toLocaleString()} tokens
505+
</span>
506+
<span className="text-gray-400 dark:text-gray-500 text-xs ml-1">
507+
({((contextTokens / getContextWindowSize(selectedModel)) * 100).toFixed(1)}%)
508+
</span>
509+
</>
510+
)}
492511
</>
493512
)}
494513
</p>

src/components/ModelSelector.jsx

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,42 +6,48 @@ const AVAILABLE_MODELS = [
66
name: 'Llama 3.2 1B',
77
description: 'Meta model, 1.2 GB',
88
url: 'onnx-community/Llama-3.2-1B-Instruct-q4f16',
9-
hasReasoningBlocks: false
9+
hasReasoningBlocks: false,
10+
contextSize: 131072 // 128k tokens
1011
},
1112
{
1213
id: 'llama-3.2-3b',
1314
name: 'Llama 3.2 3B',
1415
description: 'Meta model, 2.4 GB',
1516
url: 'onnx-community/Llama-3.2-3B-Instruct-onnx-web-gqa',
16-
hasReasoningBlocks: false
17+
hasReasoningBlocks: false,
18+
contextSize: 131072 // 128k tokens
1719
},
1820
{
1921
id: 'phi-3.5-mini',
2022
name: 'Phi-3.5 Mini 3.8B',
2123
description: 'Microsoft model, 2.1 GB',
2224
url: 'onnx-community/Phi-3.5-mini-instruct-onnx-web',
23-
hasReasoningBlocks: false
25+
hasReasoningBlocks: false,
26+
contextSize: 131072 // 128k tokens
2427
},
2528
{
2629
id: 'smollm2-1.7b',
2730
name: 'SmolLM2 1.7B',
2831
description: 'HuggingFace model, 1.1 GB',
2932
url: 'HuggingFaceTB/SmolLM2-1.7B-Instruct',
30-
hasReasoningBlocks: false
33+
hasReasoningBlocks: false,
34+
contextSize: 8192 // 8k tokens
3135
},
3236
{
3337
id: 'qwen3-0.6b',
3438
name: 'Qwen3 0.6B',
3539
description: 'Alibaba model, 0.5 GB',
3640
url: 'onnx-community/Qwen3-0.6B-ONNX',
37-
hasReasoningBlocks: true
41+
hasReasoningBlocks: true,
42+
contextSize: 32768 // 32k tokens
3843
},
3944
{
4045
id: 'deepseek-r1-distill-qwen-1.5b',
4146
name: 'DeepSeek-R1-Distill-Qwen 1.5B',
4247
description: 'DeepSeek model, 1.3 GB',
4348
url: 'onnx-community/DeepSeek-R1-Distill-Qwen-1.5B-ONNX',
44-
hasReasoningBlocks: true
49+
hasReasoningBlocks: true,
50+
contextSize: 131072 // 128k tokens
4551
}
4652
];
4753

src/worker.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ async function generate(messages, model_id) {
8484
return_dict: true,
8585
});
8686

87+
// Calculate input token count for context tracking
88+
const inputTokenCount = inputs.input_ids.dims[1]; // Get the sequence length
89+
8790
let startTime;
8891
let numTokens = 0;
8992
let tps;
@@ -100,6 +103,7 @@ async function generate(messages, model_id) {
100103
output,
101104
tps,
102105
numTokens,
106+
contextTokens: inputTokenCount + numTokens, // Total tokens used in context
103107
});
104108
};
105109

0 commit comments

Comments
 (0)