|
| 1 | +const std = @import("std"); |
| 2 | + |
| 3 | +const LLMResponse = struct { |
| 4 | + id: []const u8, |
| 5 | + object: []const u8, |
| 6 | + created: u32, |
| 7 | + model: []const u8, |
| 8 | + usage: ?struct { |
| 9 | + prompt_tokens: u32, |
| 10 | + completion_tokens: u32, |
| 11 | + total_tokens: u32, |
| 12 | + } = null, |
| 13 | + timings: ?struct { |
| 14 | + prompt_p: u32, |
| 15 | + prompt_ms: f64, |
| 16 | + prompt_per_token_ms: f64, |
| 17 | + prompt_per_second: f64, |
| 18 | + predicted_n: u32, |
| 19 | + predicted_ms: f64, |
| 20 | + predicted_per_token_ms: f64, |
| 21 | + predicted_per_second: f64, |
| 22 | + } = null, |
| 23 | + choices: []struct { |
| 24 | + message: struct { |
| 25 | + role: []const u8, |
| 26 | + content: []const u8, |
| 27 | + }, |
| 28 | + logprobs: ?struct { |
| 29 | + content: []struct { |
| 30 | + token: []const u8, |
| 31 | + logprob: f64, |
| 32 | + bytes: []const u8, |
| 33 | + top_logprobs: ?[]struct { |
| 34 | + token: []const u8, |
| 35 | + logprob: f64, |
| 36 | + }, |
| 37 | + }, |
| 38 | + } = null, |
| 39 | + finish_reason: []const u8, |
| 40 | + index: u32, |
| 41 | + }, |
| 42 | + system_fingerprint: []const u8, |
| 43 | +}; |
| 44 | + |
| 45 | +const Message = struct { |
| 46 | + role: []const u8, |
| 47 | + content: []const u8, |
| 48 | +}; |
| 49 | + |
| 50 | +const RequestPayload = struct { |
| 51 | + model: []const u8, |
| 52 | + messages: []Message, |
| 53 | +}; |
| 54 | + |
| 55 | +pub fn formatTemplate(allocator: std.mem.Allocator, template: []const u8, substitutions: []const []const u8) ![]u8 { |
| 56 | + var result = std.ArrayList(u8).empty; |
| 57 | + errdefer result.deinit(allocator); |
| 58 | + |
| 59 | + var index: usize = 0; |
| 60 | + var line_iter = std.mem.splitScalar(u8, template, ""); |
| 61 | + while (line_iter.next()) |line| { |
| 62 | + var parts = std.mem.splitSequence(u8, line, "{s}"); |
| 63 | + try result.writer().print("{s}", .{parts.next().?}); |
| 64 | + |
| 65 | + while (parts.next()) |part| { |
| 66 | + if (index < substitutions.len) { |
| 67 | + try result.writer().print("{s}", .{substitutions[index]}); |
| 68 | + index += 1; |
| 69 | + } |
| 70 | + try result.writer().print("{s}", .{part}); |
| 71 | + } |
| 72 | + try result.writer().writeByte('\n'); |
| 73 | + } |
| 74 | + _ = result.pop(); |
| 75 | + |
| 76 | + return result.toOwnedSlice(); |
| 77 | +} |
| 78 | + |
| 79 | +pub fn llmCall(allocator: std.mem.Allocator, system_prompt: []const u8, user_prompt: []const u8) !std.json.Parsed(LLMResponse) { |
| 80 | + var request_arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); |
| 81 | + defer request_arena.deinit(); |
| 82 | + |
| 83 | + const request_arena_allocator = request_arena.allocator(); |
| 84 | + |
| 85 | + var client = std.http.Client{ .allocator = request_arena_allocator }; |
| 86 | + var body = std.ArrayList(u8).empty; |
| 87 | + const uri = try std.Uri.parse("http://127.0.0.1:1337/v1/chat/completions"); |
| 88 | + |
| 89 | + var messages = [_]Message{ |
| 90 | + Message{ .role = "system", .content = system_prompt }, |
| 91 | + Message{ .role = "user", .content = user_prompt }, |
| 92 | + }; |
| 93 | + const request_payload = RequestPayload{ |
| 94 | + .model = "Qwen_Qwen3-4B-Instruct-2507-IQ4_XS", |
| 95 | + .messages = &messages, |
| 96 | + }; |
| 97 | + const payload = try std.json.Stringify.valueAlloc(request_arena_allocator, request_payload, .{}); |
| 98 | + std.debug.print("{s}\n", .{"=" ** 50}); |
| 99 | + std.debug.print("Payload: {s}\n", .{payload}); |
| 100 | + |
| 101 | + const response = try client.fetch(.{ |
| 102 | + .method = .POST, |
| 103 | + .location = .{ .uri = uri }, |
| 104 | + |
| 105 | + }) |
| 106 | +} |
0 commit comments