|
| 1 | +#!/usr/bin/env -S deno run --allow-env --allow-net |
| 2 | + |
| 3 | +import readline from 'node:readline'; |
| 4 | + |
| 5 | +const LLM_API_BASE_URL = process.env.LLM_API_BASE_URL || 'https://api.openai.com/v1'; |
| 6 | +const LLM_API_KEY = process.env.LLM_API_KEY || process.env.OPENAI_API_KEY; |
| 7 | +const LLM_CHAT_MODEL = process.env.LLM_CHAT_MODEL; |
| 8 | +const LLM_STREAMING = process.env.LLM_STREAMING !== 'no'; |
| 9 | + |
| 10 | +const LLM_DEBUG = process.env.LLM_DEBUG; |
| 11 | + |
| 12 | +/** |
| 13 | + * Represents a chat message. |
| 14 | + * |
| 15 | + * @typedef {Object} Message |
| 16 | + * @property {'system'|'user'|'assistant'} role |
| 17 | + * @property {string} content |
| 18 | + */ |
| 19 | + |
| 20 | +/** |
| 21 | + * A callback function to stream then completion. |
| 22 | + * |
| 23 | + * @callback CompletionHandler |
| 24 | + * @param {string} text |
| 25 | + * @returns {void} |
| 26 | + */ |
| 27 | + |
| 28 | +/** |
| 29 | + * Generates a chat completion using a RESTful LLM API service. |
| 30 | + * |
| 31 | + * @param {Array<Message>} messages - List of chat messages. |
| 32 | + * @param {CompletionHandler=} handler - An optional callback to stream the completion. |
| 33 | + * @returns {Promise<string>} The completion generated by the LLM. |
| 34 | + */ |
| 35 | +const chat = async (messages, handler) => { |
| 36 | + const url = `${LLM_API_BASE_URL}/chat/completions`; |
| 37 | + const auth = LLM_API_KEY ? { 'Authorization': `Bearer ${LLM_API_KEY}` } : {}; |
| 38 | + const model = LLM_CHAT_MODEL || 'gpt-4o-mini'; |
| 39 | + const stop = ['<|im_end|>', '<|end|>', '<|eot_id|>']; |
| 40 | + const max_tokens = 200; |
| 41 | + const temperature = 0; |
| 42 | + const stream = LLM_STREAMING && typeof handler === 'function'; |
| 43 | + const response = await fetch(url, { |
| 44 | + method: 'POST', |
| 45 | + headers: { 'Content-Type': 'application/json', ...auth }, |
| 46 | + body: JSON.stringify({ messages, model, stop, max_tokens, temperature, stream }) |
| 47 | + }); |
| 48 | + if (!response.ok) { |
| 49 | + throw new Error(`HTTP error with the status: ${response.status} ${response.statusText}`); |
| 50 | + } |
| 51 | + |
| 52 | + if (!stream) { |
| 53 | + const data = await response.json(); |
| 54 | + const { choices } = data; |
| 55 | + const first = choices[0]; |
| 56 | + const { message } = first; |
| 57 | + const { content } = message; |
| 58 | + const answer = content.trim(); |
| 59 | + handler && handler(answer); |
| 60 | + return answer; |
| 61 | + } |
| 62 | + |
| 63 | + const parse = (line) => { |
| 64 | + let partial = null; |
| 65 | + const prefix = line.substring(0, 6); |
| 66 | + if (prefix === 'data: ') { |
| 67 | + const payload = line.substring(6); |
| 68 | + try { |
| 69 | + const { choices } = JSON.parse(payload); |
| 70 | + const [choice] = choices; |
| 71 | + const { delta } = choice; |
| 72 | + partial = delta?.content; |
| 73 | + } catch (e) { |
| 74 | + // ignore |
| 75 | + } finally { |
| 76 | + return partial; |
| 77 | + } |
| 78 | + } |
| 79 | + return partial; |
| 80 | + } |
| 81 | + |
| 82 | + const reader = response.body.getReader(); |
| 83 | + const decoder = new TextDecoder(); |
| 84 | + |
| 85 | + let answer = ''; |
| 86 | + let buffer = ''; |
| 87 | + while (true) { |
| 88 | + const { value, done } = await reader.read(); |
| 89 | + if (done) { |
| 90 | + break; |
| 91 | + } |
| 92 | + const lines = decoder.decode(value).split('\n'); |
| 93 | + for (let i = 0; i < lines.length; ++i) { |
| 94 | + const line = buffer + lines[i]; |
| 95 | + if (line[0] === ':') { |
| 96 | + buffer = ''; |
| 97 | + continue; |
| 98 | + } |
| 99 | + if (line === 'data: [DONE]') { |
| 100 | + break; |
| 101 | + } |
| 102 | + if (line.length > 0) { |
| 103 | + const partial = parse(line.trim()); |
| 104 | + if (partial === null) { |
| 105 | + buffer = line; |
| 106 | + } else if (partial && partial.length > 0) { |
| 107 | + buffer = ''; |
| 108 | + if (answer.length < 1) { |
| 109 | + const leading = partial.trim(); |
| 110 | + answer = leading; |
| 111 | + handler && (leading.length > 0) && handler(leading); |
| 112 | + } else { |
| 113 | + answer += partial; |
| 114 | + handler && handler(partial); |
| 115 | + } |
| 116 | + } |
| 117 | + } |
| 118 | + } |
| 119 | + } |
| 120 | + return answer; |
| 121 | +} |
| 122 | + |
| 123 | +const SYSTEM_PROMPT = 'Answer the question politely and concisely.'; |
| 124 | + |
| 125 | +(async () => { |
| 126 | + console.log(`Using LLM at ${LLM_API_BASE_URL}.`); |
| 127 | + console.log('Press Ctrl+D to exit.') |
| 128 | + console.log(); |
| 129 | + |
| 130 | + const messages = []; |
| 131 | + messages.push({ role: 'system', content: SYSTEM_PROMPT }); |
| 132 | + |
| 133 | + let loop = true; |
| 134 | + const io = readline.createInterface({ input: process.stdin, output: process.stdout }); |
| 135 | + io.on('close', () => { loop = false; }); |
| 136 | + |
| 137 | + const qa = () => { |
| 138 | + io.question('>> ', async (question) => { |
| 139 | + messages.push({ role: 'user', content: question }); |
| 140 | + const start = Date.now(); |
| 141 | + const answer = await chat(messages, (str) => process.stdout.write(str)); |
| 142 | + messages.push({ role: 'assistant', content: answer.trim() }); |
| 143 | + console.log(); |
| 144 | + const elapsed = Date.now() - start; |
| 145 | + LLM_DEBUG && console.log(`[${elapsed} ms]`); |
| 146 | + console.log(); |
| 147 | + loop && qa(); |
| 148 | + }) |
| 149 | + } |
| 150 | + |
| 151 | + qa(); |
| 152 | +})(); |
0 commit comments