|
5 | 5 | syntax = "proto3";
|
6 | 6 | package fmaas;
|
7 | 7 |
|
| 8 | + |
8 | 9 | service GenerationService {
|
9 | 10 | // Generates text given a text prompt, for one or more inputs
|
10 | 11 | rpc Generate (BatchedGenerationRequest) returns (BatchedGenerationResponse) {}
|
|
30 | 31 | optional string prefix_id = 2;
|
31 | 32 | optional string adapter_id = 4;
|
32 | 33 | repeated GenerationRequest requests = 3;
|
33 |
| - |
| 34 | + |
34 | 35 | Parameters params = 10;
|
35 | 36 | }
|
36 | 37 |
|
|
40 | 41 | optional string prefix_id = 2;
|
41 | 42 | optional string adapter_id = 4;
|
42 | 43 | GenerationRequest request = 3;
|
43 |
| - |
| 44 | + |
44 | 45 | Parameters params = 10;
|
45 | 46 | }
|
46 | 47 |
|
|
61 | 62 | string stop_sequence = 11;
|
62 | 63 | // Random seed used, not applicable for greedy requests
|
63 | 64 | uint64 seed = 10;
|
64 |
| - |
| 65 | + |
65 | 66 | // Individual generated tokens and associated details, if requested
|
66 | 67 | repeated TokenInfo tokens = 8;
|
67 |
| - |
| 68 | + |
68 | 69 | // Input tokens and associated details, if requested
|
69 | 70 | repeated TokenInfo input_tokens = 9;
|
70 | 71 | }
|
|
98 | 99 | // Default (0.0) means no penalty (equivalent to 1.0)
|
99 | 100 | // 1.2 is a recommended value
|
100 | 101 | float repetition_penalty = 1;
|
101 |
| - |
| 102 | + |
102 | 103 | // Exponentially increases the score of the EOS token
|
103 | 104 | // once start_index tokens have been generated
|
104 | 105 | optional LengthPenalty length_penalty = 2;
|
|
139 | 140 | float top_p = 3;
|
140 | 141 | // Default (0) means disabled (equivalent to 1.0)
|
141 | 142 | float typical_p = 4;
|
142 |
| - |
| 143 | + |
143 | 144 | optional uint64 seed = 5;
|
144 | 145 | }
|
145 | 146 |
|
|
206 | 207 | float logprob = 3;
|
207 | 208 | // One-based rank relative to other tokens, if requested
|
208 | 209 | uint32 rank = 4;
|
209 |
| - |
| 210 | + |
210 | 211 | message TopToken {
|
211 | 212 | // uint32 id = 1; // TBD
|
212 | 213 | string text = 2;
|
213 | 214 | float logprob = 3;
|
214 | 215 | }
|
215 |
| - |
| 216 | + |
216 | 217 | // Top N candidate tokens at this position, if requested
|
217 | 218 | // May or may not include this token
|
218 | 219 | repeated TopToken top_tokens = 5;
|
|
257 | 258 |
|
258 | 259 | // ============================================================================================================
|
259 | 260 | // Model Info API
|
260 |
| - |
| 261 | + |
261 | 262 | message ModelInfoRequest {
|
262 | 263 | string model_id = 1;
|
263 | 264 | }
|
|
267 | 268 | DECODER_ONLY = 0;
|
268 | 269 | ENCODER_DECODER = 1;
|
269 | 270 | }
|
270 |
| - |
| 271 | + |
271 | 272 | ModelKind model_kind = 1;
|
272 | 273 | uint32 max_sequence_length = 2;
|
273 | 274 | uint32 max_new_tokens = 3;
|
|
0 commit comments