Skip to content

Commit dd7c467

Browse files
LLM performance metric tracking (#2825)
* WIP performance metric tracking * fix: patch UI trying to .toFixed() null metric Anthropic tracking migraiton cleanup logs * Apipie implmentation, not tested * Cleanup Anthropic notes, Add support for AzureOpenAI tracking * bedrock token metric tracking * Cohere support * feat: improve default stream handler to track for provider who are actually OpenAI compliant in usage reporting add deepseek support * feat: Add FireworksAI tracking reporting fix: improve handler when usage:null is reported (why?) * Add token reporting for GenericOpenAI * token reporting for koboldcpp + lmstudio * lint * support Groq token tracking * HF token tracking * token tracking for togetherai * LiteLLM token tracking * linting + Mitral token tracking support * XAI token metric reporting * native provider runner * LocalAI token tracking * Novita token tracking * OpenRouter token tracking * Apipie stream metrics * textwebgenui token tracking * perplexity token reporting * ollama token reporting * lint * put back comment * Rip out LC ollama wrapper and use official library * patch images with new ollama lib * improve ollama offline message * fix image handling in ollama llm provider * lint * NVIDIA NIM token tracking * update openai compatbility responses * UI/UX show/hide metrics on click for user preference * update bedrock client --------- Co-authored-by: shatfield4 <[email protected]>
1 parent 15abc3f commit dd7c467

File tree

42 files changed

+1770
-566
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1770
-566
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import { numberWithCommas } from "@/utils/numbers";
2+
import React, { useEffect, useState, useContext } from "react";
3+
const MetricsContext = React.createContext();
4+
const SHOW_METRICS_KEY = "anythingllm_show_chat_metrics";
5+
const SHOW_METRICS_EVENT = "anythingllm_show_metrics_change";
6+
7+
/**
8+
* @param {number} duration - duration in milliseconds
9+
* @returns {string}
10+
*/
11+
function formatDuration(duration) {
12+
try {
13+
return duration < 1
14+
? `${(duration * 1000).toFixed(0)}ms`
15+
: `${duration.toFixed(3)}s`;
16+
} catch {
17+
return "";
18+
}
19+
}
20+
21+
/**
22+
* Format the output TPS to a string
23+
* @param {number} outputTps - output TPS
24+
* @returns {string}
25+
*/
26+
function formatTps(outputTps) {
27+
try {
28+
return outputTps < 1000
29+
? outputTps.toFixed(2)
30+
: numberWithCommas(outputTps.toFixed(0));
31+
} catch {
32+
return "";
33+
}
34+
}
35+
36+
/**
37+
* Get the show metrics setting from localStorage `anythingllm_show_chat_metrics` key
38+
* @returns {boolean}
39+
*/
40+
function getAutoShowMetrics() {
41+
return window?.localStorage?.getItem(SHOW_METRICS_KEY) === "true";
42+
}
43+
44+
/**
45+
* Toggle the show metrics setting in localStorage `anythingllm_show_chat_metrics` key
46+
* @returns {void}
47+
*/
48+
function toggleAutoShowMetrics() {
49+
const currentValue = getAutoShowMetrics() || false;
50+
window?.localStorage?.setItem(SHOW_METRICS_KEY, !currentValue);
51+
window.dispatchEvent(
52+
new CustomEvent(SHOW_METRICS_EVENT, {
53+
detail: { showMetricsAutomatically: !currentValue },
54+
})
55+
);
56+
return !currentValue;
57+
}
58+
59+
/**
60+
* Provider for the metrics context that controls the visibility of the metrics
61+
* per-chat based on the user's preference.
62+
* @param {React.ReactNode} children
63+
* @returns {React.ReactNode}
64+
*/
65+
export function MetricsProvider({ children }) {
66+
const [showMetricsAutomatically, setShowMetricsAutomatically] =
67+
useState(getAutoShowMetrics());
68+
69+
useEffect(() => {
70+
function handleShowingMetricsEvent(e) {
71+
if (!e?.detail?.hasOwnProperty("showMetricsAutomatically")) return;
72+
setShowMetricsAutomatically(e.detail.showMetricsAutomatically);
73+
}
74+
console.log("Adding event listener for metrics visibility");
75+
window.addEventListener(SHOW_METRICS_EVENT, handleShowingMetricsEvent);
76+
return () =>
77+
window.removeEventListener(SHOW_METRICS_EVENT, handleShowingMetricsEvent);
78+
}, []);
79+
80+
return (
81+
<MetricsContext.Provider
82+
value={{ showMetricsAutomatically, setShowMetricsAutomatically }}
83+
>
84+
{children}
85+
</MetricsContext.Provider>
86+
);
87+
}
88+
89+
/**
90+
* Render the metrics for a given chat, if available
91+
* @param {metrics: {duration:number, outputTps: number}} props
92+
* @returns
93+
*/
94+
export default function RenderMetrics({ metrics = {} }) {
95+
// Inherit the showMetricsAutomatically state from the MetricsProvider so the state is shared across all chats
96+
const { showMetricsAutomatically, setShowMetricsAutomatically } =
97+
useContext(MetricsContext);
98+
if (!metrics?.duration || !metrics?.outputTps) return null;
99+
100+
return (
101+
<button
102+
type="button"
103+
onClick={() => setShowMetricsAutomatically(toggleAutoShowMetrics())}
104+
data-tooltip-id="metrics-visibility"
105+
data-tooltip-content={
106+
showMetricsAutomatically
107+
? "Click to only show metrics when hovering"
108+
: "Click to show metrics as soon as they are available"
109+
}
110+
className={`border-none flex justify-end items-center gap-x-[8px] ${showMetricsAutomatically ? "opacity-100" : "opacity-0"} md:group-hover:opacity-100 transition-all duration-300`}
111+
>
112+
<p className="cursor-pointer text-xs font-mono text-theme-text-secondary opacity-50">
113+
{formatDuration(metrics.duration)} ({formatTps(metrics.outputTps)}{" "}
114+
tok/s)
115+
</p>
116+
</button>
117+
);
118+
}

frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/index.jsx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import useCopyText from "@/hooks/useCopyText";
33
import { Check, ThumbsUp, ArrowsClockwise, Copy } from "@phosphor-icons/react";
44
import Workspace from "@/models/workspace";
55
import { EditMessageAction } from "./EditMessage";
6+
import RenderMetrics from "./RenderMetrics";
67
import ActionMenu from "./ActionMenu";
78

89
const Actions = ({
@@ -15,6 +16,7 @@ const Actions = ({
1516
forkThread,
1617
isEditing,
1718
role,
19+
metrics = {},
1820
}) => {
1921
const [selectedFeedback, setSelectedFeedback] = useState(feedbackScore);
2022
const handleFeedback = async (newFeedback) => {
@@ -58,6 +60,7 @@ const Actions = ({
5860
/>
5961
</div>
6062
</div>
63+
<RenderMetrics metrics={metrics} />
6164
</div>
6265
);
6366
};

frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/index.jsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ const HistoricalMessage = ({
2626
regenerateMessage,
2727
saveEditedMessage,
2828
forkThread,
29+
metrics = {},
2930
}) => {
3031
const { isEditing } = useEditMessage({ chatId, role });
3132
const { isDeleted, completeDelete, onEndAnimation } = useWatchDeleteMessage({
@@ -117,6 +118,7 @@ const HistoricalMessage = ({
117118
isEditing={isEditing}
118119
role={role}
119120
forkThread={forkThread}
121+
metrics={metrics}
120122
/>
121123
</div>
122124
{role === "assistant" && <Citations sources={sources} />}

frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/index.jsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ export default function ChatHistory({
227227
isLastMessage={isLastBotReply}
228228
saveEditedMessage={saveEditedMessage}
229229
forkThread={forkThread}
230+
metrics={props.metrics}
230231
/>
231232
);
232233
})}

frontend/src/components/WorkspaceChat/ChatContainer/ChatTooltips/index.jsx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ export function ChatTooltips() {
6161
// as the citation modal is z-indexed above the chat history
6262
className="tooltip !text-xs z-[100]"
6363
/>
64+
<Tooltip
65+
id="metrics-visibility"
66+
place="bottom"
67+
delayShow={300}
68+
className="tooltip !text-xs"
69+
/>
6470
</>
6571
);
6672
}

frontend/src/components/WorkspaceChat/ChatContainer/index.jsx

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import SpeechRecognition, {
1818
useSpeechRecognition,
1919
} from "react-speech-recognition";
2020
import { ChatTooltips } from "./ChatTooltips";
21+
import { MetricsProvider } from "./ChatHistory/HistoricalMessage/Actions/RenderMetrics";
2122

2223
export default function ChatContainer({ workspace, knownHistory = [] }) {
2324
const { threadSlug = null } = useParams();
@@ -268,14 +269,16 @@ export default function ChatContainer({ workspace, knownHistory = [] }) {
268269
>
269270
{isMobile && <SidebarMobileHeader />}
270271
<DnDFileUploaderWrapper>
271-
<ChatHistory
272-
history={chatHistory}
273-
workspace={workspace}
274-
sendCommand={sendCommand}
275-
updateHistory={setChatHistory}
276-
regenerateAssistantMessage={regenerateAssistantMessage}
277-
hasAttachments={files.length > 0}
278-
/>
272+
<MetricsProvider>
273+
<ChatHistory
274+
history={chatHistory}
275+
workspace={workspace}
276+
sendCommand={sendCommand}
277+
updateHistory={setChatHistory}
278+
regenerateAssistantMessage={regenerateAssistantMessage}
279+
hasAttachments={files.length > 0}
280+
/>
281+
</MetricsProvider>
279282
<PromptInput
280283
submit={handleSubmit}
281284
onChange={handleMessageChange}

frontend/src/utils/chat/index.js

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ export default function handleChat(
1919
close,
2020
chatId = null,
2121
action = null,
22+
metrics = {},
2223
} = chatResult;
2324

2425
if (type === "abort" || type === "statusResponse") {
@@ -35,6 +36,7 @@ export default function handleChat(
3536
error,
3637
animate: false,
3738
pending: false,
39+
metrics,
3840
},
3941
]);
4042
_chatHistory.push({
@@ -47,6 +49,7 @@ export default function handleChat(
4749
error,
4850
animate: false,
4951
pending: false,
52+
metrics,
5053
});
5154
} else if (type === "textResponse") {
5255
setLoadingResponse(false);
@@ -62,6 +65,7 @@ export default function handleChat(
6265
animate: !close,
6366
pending: false,
6467
chatId,
68+
metrics,
6569
},
6670
]);
6771
_chatHistory.push({
@@ -74,21 +78,42 @@ export default function handleChat(
7478
animate: !close,
7579
pending: false,
7680
chatId,
81+
metrics,
7782
});
78-
} else if (type === "textResponseChunk") {
83+
} else if (
84+
type === "textResponseChunk" ||
85+
type === "finalizeResponseStream"
86+
) {
7987
const chatIdx = _chatHistory.findIndex((chat) => chat.uuid === uuid);
8088
if (chatIdx !== -1) {
8189
const existingHistory = { ..._chatHistory[chatIdx] };
82-
const updatedHistory = {
83-
...existingHistory,
84-
content: existingHistory.content + textResponse,
85-
sources,
86-
error,
87-
closed: close,
88-
animate: !close,
89-
pending: false,
90-
chatId,
91-
};
90+
let updatedHistory;
91+
92+
// If the response is finalized, we can set the loading state to false.
93+
// and append the metrics to the history.
94+
if (type === "finalizeResponseStream") {
95+
updatedHistory = {
96+
...existingHistory,
97+
closed: close,
98+
animate: !close,
99+
pending: false,
100+
chatId,
101+
metrics,
102+
};
103+
setLoadingResponse(false);
104+
} else {
105+
updatedHistory = {
106+
...existingHistory,
107+
content: existingHistory.content + textResponse,
108+
sources,
109+
error,
110+
closed: close,
111+
animate: !close,
112+
pending: false,
113+
chatId,
114+
metrics,
115+
};
116+
}
92117
_chatHistory[chatIdx] = updatedHistory;
93118
} else {
94119
_chatHistory.push({
@@ -101,6 +126,7 @@ export default function handleChat(
101126
animate: !close,
102127
pending: false,
103128
chatId,
129+
metrics,
104130
});
105131
}
106132
setChatHistory([..._chatHistory]);
@@ -125,6 +151,7 @@ export default function handleChat(
125151
error: null,
126152
animate: false,
127153
pending: false,
154+
metrics,
128155
};
129156
_chatHistory[chatIdx] = updatedHistory;
130157

server/.env.example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
5252
# PERPLEXITY_API_KEY='my-perplexity-key'
5353
# PERPLEXITY_MODEL_PREF='codellama-34b-instruct'
5454

55+
# LLM_PROVIDER='deepseek'
56+
# DEEPSEEK_API_KEY=YOUR_API_KEY
57+
# DEEPSEEK_MODEL_PREF='deepseek-chat'
58+
5559
# LLM_PROVIDER='openrouter'
5660
# OPENROUTER_API_KEY='my-openrouter-key'
5761
# OPENROUTER_MODEL_PREF='openrouter/auto'

server/endpoints/api/admin/index.js

Lines changed: 20 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -610,24 +610,20 @@ function apiAdminEndpoints(app) {
610610
const workspaceUsers = await Workspace.workspaceUsers(workspace.id);
611611

612612
if (!workspace) {
613-
response
614-
.status(404)
615-
.json({
616-
success: false,
617-
error: `Workspace ${workspaceSlug} not found`,
618-
users: workspaceUsers,
619-
});
613+
response.status(404).json({
614+
success: false,
615+
error: `Workspace ${workspaceSlug} not found`,
616+
users: workspaceUsers,
617+
});
620618
return;
621619
}
622620

623621
if (userIds.length === 0) {
624-
response
625-
.status(404)
626-
.json({
627-
success: false,
628-
error: `No valid user IDs provided.`,
629-
users: workspaceUsers,
630-
});
622+
response.status(404).json({
623+
success: false,
624+
error: `No valid user IDs provided.`,
625+
users: workspaceUsers,
626+
});
631627
return;
632628
}
633629

@@ -637,13 +633,11 @@ function apiAdminEndpoints(app) {
637633
workspace.id,
638634
userIds
639635
);
640-
return response
641-
.status(200)
642-
.json({
643-
success,
644-
error,
645-
users: await Workspace.workspaceUsers(workspace.id),
646-
});
636+
return response.status(200).json({
637+
success,
638+
error,
639+
users: await Workspace.workspaceUsers(workspace.id),
640+
});
647641
}
648642

649643
// Add new users to the workspace if they are not already in the workspace
@@ -653,13 +647,11 @@ function apiAdminEndpoints(app) {
653647
);
654648
if (usersToAdd.length > 0)
655649
await WorkspaceUser.createManyUsers(usersToAdd, workspace.id);
656-
response
657-
.status(200)
658-
.json({
659-
success: true,
660-
error: null,
661-
users: await Workspace.workspaceUsers(workspace.id),
662-
});
650+
response.status(200).json({
651+
success: true,
652+
error: null,
653+
users: await Workspace.workspaceUsers(workspace.id),
654+
});
663655
} catch (e) {
664656
console.error(e);
665657
response.sendStatus(500).end();

0 commit comments

Comments
 (0)