Skip to content

Commit c62d09b

Browse files
committed
Make multiround benchmarking logs clearer
1 parent f8d821f commit c62d09b

File tree

6 files changed

+58
-44
lines changed

6 files changed

+58
-44
lines changed

src/benchmark/framework/benchmarkingCore/executeBenchmarkingTask.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,18 @@ export async function executeBenchmarkingTask(
122122
nextGeneratedProofId: nextGeneratedProofId,
123123
roundNumber: roundNumber,
124124
};
125+
const parentProofDesc =
126+
parentProof === undefined
127+
? "generate proofs"
128+
: `proof to fix id: ${parentProof?.benchmarkedProof.generatedProofId}`;
129+
const thisRoundLogger = itemLogger.createChildLoggerWithIdentifier(
130+
`[round: ${roundNumber}, ${parentProofDesc}]`
131+
);
125132
const result = await benchmarkSingleCompletionGeneration(
126133
thisRoundGenerationArgs,
127134
options,
128135
modelsScheduler,
129-
itemLogger,
136+
thisRoundLogger,
130137
proofsChecker,
131138
abortSignal
132139
);

src/benchmark/framework/benchmarkingCore/executeBenchmarkingTaskUtils/errorHandling.ts

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,7 @@ import { ModelParams } from "../../../../llm/llmServices/modelParams";
33

44
import { buildErrorCompleteLog } from "../../../../utils/errorsUtils";
55
import { IllegalStateError } from "../../../../utils/throwErrors";
6-
import {
7-
AsOneRecordLogsBuilder,
8-
BenchmarkingLogger,
9-
} from "../../logging/benchmarkingLogger";
6+
import { BenchmarkingLogger } from "../../logging/benchmarkingLogger";
107
import { BenchmarkingModelParams } from "../../structures/benchmarkingCore/benchmarkingModelParams";
118
import { BenchmarkingOptions } from "../../structures/benchmarkingCore/benchmarkingOptions";
129
import { FailFastAbortError } from "../../utils/asyncUtils/abortUtils";
@@ -82,13 +79,14 @@ export namespace ExecuteBenchmarkingTaskErrorHandlingUtils {
8279
}
8380

8481
export function logCommonError(
85-
e: any,
82+
e: ExpectedError,
8683
itemLogger: BenchmarkingLogger,
8784
params: BenchmarkingModelParams<ModelParams>,
8885
options: BenchmarkingOptions,
8986
abortSignal: AbortSignal
9087
) {
91-
const logConclusion = (errorRecordLogger: AsOneRecordLogsBuilder) => {
88+
const errorRecordLogger = itemLogger.asOneRecord();
89+
const logConclusion = () => {
9290
if (options.failFast) {
9391
if (abortSignal.aborted) {
9492
errorRecordLogger.info(
@@ -107,22 +105,16 @@ export namespace ExecuteBenchmarkingTaskErrorHandlingUtils {
107105
};
108106

109107
if (e instanceof BenchmarkingError) {
110-
logConclusion(itemLogger.asOneRecord().error(e.message));
108+
errorRecordLogger.error(e.message);
111109
} else if (e instanceof ConfigurationError) {
112-
logConclusion(
113-
itemLogger
114-
.asOneRecord()
115-
.error(
116-
`"${params.modelParams.modelId}" is configured incorrectly: ${e.message}`
117-
)
110+
errorRecordLogger.error(
111+
`"${params.modelParams.modelId}" is configured incorrectly: ${e.message}`
118112
);
119113
} else {
120-
logConclusion(
121-
itemLogger
122-
.asOneRecord()
123-
.error(`Error occurred:`)
124-
.error(buildErrorCompleteLog(e), "gray")
125-
);
114+
errorRecordLogger
115+
.error(`Error occurred:`)
116+
.error(buildErrorCompleteLog(e), "gray");
126117
}
118+
logConclusion();
127119
}
128120
}

src/benchmark/framework/benchmarkingCore/executeBenchmarkingTaskUtils/logging.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,14 @@ export namespace ExecuteBenchmarkingTaskLoggingUtils {
2828
.debug("First valid proof:")
2929
.debug(roundResult.thisRoundValidProofs[0].asString);
3030
} else {
31-
asOneRecordLogs.debug(
31+
asOneRecordLogs.info(
3232
`However, no valid proofs have been found ${heavyCrossMark}`
3333
);
3434
}
3535
const generatedProofsIds = roundResult.generatedProofs
3636
.map((proof) => `${proof.generatedProofId}`)
3737
.join(", ");
38-
asOneRecordLogs.debug(
38+
asOneRecordLogs.info(
3939
`Newly generated proofs id-s are: [${generatedProofsIds}]`
4040
);
4141
logElapsedTime();

src/benchmark/framework/benchmarkingCore/singleCompletionGeneration/benchmarkSingleCompletionGeneration.ts

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import {
3131
} from "../../../../utils/time";
3232
import { BenchmarkingLogger } from "../../logging/benchmarkingLogger";
3333
import { writeTeamCityStatisticsValue } from "../../logging/consoleWriteUtils";
34+
import { infinitySymbol } from "../../logging/specialSymbols";
3435
import { BenchmarkingModelParams } from "../../structures/benchmarkingCore/benchmarkingModelParams";
3536
import { BenchmarkingOptions } from "../../structures/benchmarkingCore/benchmarkingOptions";
3637
import {
@@ -129,15 +130,6 @@ export async function benchmarkSingleCompletionGeneration<
129130
logger,
130131
abortSignal
131132
);
132-
logger
133-
.asOneRecord()
134-
.info(
135-
`Successfully generated ${proofGenerationResult.generatedProofs.length} proof(s)`
136-
)
137-
.debug(
138-
`Effective elapsed time: ${proofGenerationResult.effectiveElapsedTimeMillis} ms`,
139-
"gray"
140-
);
141133
const preparedProofs: [string, GeneratedProof, number][] =
142134
proofGenerationResult.generatedProofs.map(
143135
(generatedProof: GeneratedProof, index: number) => [
@@ -266,6 +258,12 @@ export async function benchmarkSingleCompletionGeneration<
266258
return result;
267259
}
268260

261+
/**
262+
* Prevents from buggy delay estimates:
263+
* infinite cycle with zero delays might cause some troubles.
264+
*/
265+
export const minDelayMillis = 100;
266+
269267
namespace RemoteConnectionErrorDelays {
270268
export const initialDelayMillis = 10_000;
271269
export const exponentialMultiplier = 2;
@@ -341,6 +339,7 @@ async function generateProofWithRetriesExclusively<
341339
generateProof,
342340
generationArgs.llmService,
343341
options,
342+
generationArgs.roundNumber,
344343
logger,
345344
abortSignal
346345
);
@@ -353,19 +352,29 @@ async function generateProofWithRetriesMeasured(
353352
) => Promise<GeneratedProof[]>,
354353
llmService: LLMService<any, any>,
355354
options: BenchmarkingOptions,
355+
roundNumber: number,
356356
logger: BenchmarkingLogger,
357357
abortSignal: AbortSignal
358358
): Promise<ProofGenerationResult> {
359359
let delayMillis = 0;
360360
let prevFailureIsConnectionError = false;
361-
let attemptIndex = 0;
361+
let attemptIndex = 1;
362+
const maxAttemptsString = options.proofGenerationRetries ?? infinitySymbol;
362363

363364
let totalTime = new TimeMark();
364365
while (true) {
366+
const attemptLogger = logger.createChildLoggerWithIdentifier(
367+
`[proof generation attempt ${attemptIndex}/${maxAttemptsString}]`
368+
);
365369
// `options.proofGenerationRetries` might be undefined meaning the unlimited retries case
366-
if (attemptIndex === options.proofGenerationRetries) {
370+
if (attemptIndex - 1 === options.proofGenerationRetries) {
371+
attemptLogger.error(
372+
`max retries (${options.proofGenerationRetries}) has been reached`,
373+
"default"
374+
);
367375
throwBenchmarkingError(
368-
`Proof generation failed: max retries (${options.proofGenerationRetries}) has been reached`
376+
`Proof generation failed: max retries (${options.proofGenerationRetries}) `,
377+
`has been reached at round ${roundNumber}`
369378
);
370379
}
371380
throwOnAbort(abortSignal);
@@ -386,10 +395,10 @@ async function generateProofWithRetriesMeasured(
386395
};
387396

388397
const tokens = result.tokensSpentInTotal;
389-
logger
398+
attemptLogger
390399
.asOneRecord()
391-
.debug(
392-
`Attempt #${attemptIndex}, successfully generated proofs`
400+
.info(
401+
`Successfully generated ${generatedProofs.length} proof(s)`
393402
)
394403
.debug(
395404
`Tokens spent: ${tokens.tokensSpentInTotal} = ${tokens.promptTokens} (prompt) + ${tokens.generatedTokens} (generated answer)`
@@ -415,19 +424,23 @@ async function generateProofWithRetriesMeasured(
415424
const llmServiceError = e as LLMServiceError;
416425

417426
if (llmServiceError instanceof ConfigurationError) {
418-
logger.debug(
419-
`Attempt #${attemptIndex}, configuration error: ${llmServiceError.message}`
427+
attemptLogger.error(
428+
`Configuration error: ${llmServiceError.message}`,
429+
"default"
420430
);
421431
throw llmServiceError;
422432
}
423433
if (llmServiceError instanceof GenerationFailedError) {
424434
const estimatedTime =
425435
llmService.estimateTimeToBecomeAvailable();
426-
delayMillis = timeToMillis(estimatedTime);
427-
logger
436+
delayMillis = Math.max(
437+
timeToMillis(estimatedTime),
438+
minDelayMillis
439+
);
440+
attemptLogger
428441
.asOneRecord()
429442
.debug(
430-
`Attempt #${attemptIndex}, generation failed error: ${llmServiceError.message}`
443+
`Generation failed error: ${llmServiceError.message}`
431444
)
432445
.debug(
433446
`Estimated time to become available: ${timeToString(estimatedTime)}`
@@ -441,10 +454,10 @@ async function generateProofWithRetriesMeasured(
441454
RemoteConnectionErrorDelays.initialDelayMillis;
442455
prevFailureIsConnectionError = true;
443456
}
444-
logger
457+
attemptLogger
445458
.asOneRecord()
446459
.debug(
447-
`Attempt #${attemptIndex}, remote connection error: ${stringifyAnyValue(llmServiceError.message)}`
460+
`Remote connection error: ${stringifyAnyValue(llmServiceError.message)}`
448461
)
449462
.debug(`Delay to wait for: ${millisToString(delayMillis)}`);
450463
} else {
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
export const heavyCheckMark = "\u2714";
22
export const heavyCrossMark = "\u2718";
3+
export const infinitySymbol = "\u221E";

src/benchmark/framework/structures/benchmarkingResults/benchmarkedItem.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ abstract class AbstractBenchmarkedCompletionGeneration<
118118
);
119119
}
120120

121+
// TODO: can be cached and updated only on linking
121122
isSuccessfulCompletion(): boolean {
122123
return this.getAllValidProofs().length > 0;
123124
}

0 commit comments

Comments
 (0)