Skip to content

Commit 48f4839

Browse files
improve overall report
1 parent 4834388 commit 48f4839

File tree

1 file changed

+63
-53
lines changed

1 file changed

+63
-53
lines changed

.github/scripts/starklings-evaluate.js

Lines changed: 63 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ async function testExercise(exercise, starklingsPath, runNumber = 1) {
215215
log(`Updated exercise file with generated code`);
216216

217217
// Sauvegarder les fichiers de debug SEULEMENT pour le dernier run (run 10)
218-
if (SAVE_RESPONSES && runNumber === 10) {
218+
if (SAVE_RESPONSES && runNumber === 2) {
219219
const solutionFile = path.join(__dirname, '..', '..', 'debug', `${exercise.name}_solution.cairo`);
220220
fs.mkdirSync(path.dirname(solutionFile), { recursive: true });
221221
fs.writeFileSync(solutionFile, correctedCode);
@@ -248,7 +248,7 @@ async function testExercise(exercise, starklingsPath, runNumber = 1) {
248248
};
249249

250250
// Sauvegarder les erreurs SEULEMENT pour le dernier run
251-
if (SAVE_RESPONSES && runNumber === 10) {
251+
if (SAVE_RESPONSES && runNumber === 2) {
252252
const errorFile = path.join(__dirname, '..', '..', 'debug', `${exercise.name}_error.txt`);
253253
fs.writeFileSync(errorFile, `Exit code: ${error.status}\n\nSTDOUT:\n${error.stdout}\n\nSTDERR:\n${error.stderr}`);
254254
log(`Error details saved to: ${errorFile}`);
@@ -328,55 +328,65 @@ function extractCairoCode(generatedResponse) {
328328
}
329329

330330
function generateConsolidatedReport(allResults) {
331-
if (allResults.length === 0) {
332-
return { error: 'No successful runs' };
333-
}
334-
335-
const successRates = allResults.map(r => parseFloat(r.globalSuccessRate));
336-
const averageSuccessRate = (successRates.reduce((sum, rate) => sum + rate, 0) / successRates.length).toFixed(1);
337-
338-
const bestRun = allResults.reduce((best, current) =>
339-
parseFloat(current.globalSuccessRate) > parseFloat(best.globalSuccessRate) ? current : best
340-
);
341-
342-
const worstRun = allResults.reduce((worst, current) =>
343-
parseFloat(current.globalSuccessRate) < parseFloat(worst.globalSuccessRate) ? current : worst
344-
);
345-
346-
// Analyse par catégorie
347-
const categoryStats = {};
348-
allResults.forEach(run => {
349-
run.categories.forEach(category => {
350-
if (!categoryStats[category.category]) {
351-
categoryStats[category.category] = {
352-
successRates: [],
353-
averageSuccessRate: 0,
354-
bestRate: 0,
355-
worstRate: 100
356-
};
357-
}
358-
359-
const rate = parseFloat(category.successRate);
360-
categoryStats[category.category].successRates.push(rate);
361-
categoryStats[category.category].bestRate = Math.max(categoryStats[category.category].bestRate, rate);
362-
categoryStats[category.category].worstRate = Math.min(categoryStats[category.category].worstRate, rate);
363-
});
364-
});
365-
366-
// Calculer les moyennes par catégorie
367-
Object.keys(categoryStats).forEach(category => {
368-
const rates = categoryStats[category].successRates;
369-
categoryStats[category].averageSuccessRate = (rates.reduce((sum, rate) => sum + rate, 0) / rates.length).toFixed(1);
370-
});
371-
372-
return {
373-
totalRuns: allResults.length,
374-
averageSuccessRate: averageSuccessRate,
375-
bestRun: bestRun,
376-
worstRun: worstRun,
377-
categoryStats: categoryStats,
378-
allRuns: allResults
379-
};
331+
if (allResults.length === 0) {
332+
return { error: 'No successful runs' };
333+
}
334+
335+
// Taux de réussite global
336+
const successRates = allResults.map(r => parseFloat(r.globalSuccessRate));
337+
const averageSuccessRate = (successRates.reduce((sum, rate) => sum + rate, 0) / successRates.length).toFixed(1);
338+
339+
// Taux de réussite par catégorie
340+
const categoryStats = {};
341+
allResults.forEach(run => {
342+
run.categories.forEach(category => {
343+
if (!categoryStats[category.category]) {
344+
categoryStats[category.category] = {
345+
successRates: []
346+
};
347+
}
348+
categoryStats[category.category].successRates.push(parseFloat(category.successRate));
349+
});
350+
});
351+
352+
// Calculer les moyennes par catégorie
353+
const categoryAverages = {};
354+
Object.keys(categoryStats).forEach(category => {
355+
const rates = categoryStats[category].successRates;
356+
categoryAverages[category] = (rates.reduce((sum, rate) => sum + rate, 0) / rates.length).toFixed(1) + '%';
357+
});
358+
359+
// Collecter les erreurs par exercice et par run
360+
const exerciseErrors = {};
361+
allResults.forEach(run => {
362+
run.categories.forEach(category => {
363+
category.exercises.forEach(exercise => {
364+
if (!exercise.success && exercise.error) {
365+
if (!exerciseErrors[exercise.name]) {
366+
exerciseErrors[exercise.name] = [];
367+
}
368+
369+
// Ajouter l'erreur avec le numéro de run
370+
exerciseErrors[exercise.name].push({
371+
run: run.runNumber,
372+
type: exercise.error.type || 'COMPILATION_ERROR',
373+
message: exercise.error.message || 'Compilation failed',
374+
stdout: exercise.error.stdout ? exercise.error.stdout.substring(0, 500) : null,
375+
stderr: exercise.error.stderr ? exercise.error.stderr.substring(0, 500) : null
376+
});
377+
}
378+
});
379+
});
380+
});
381+
382+
return {
383+
summary: {
384+
totalRuns: allResults.length,
385+
globalSuccessRate: averageSuccessRate + '%'
386+
},
387+
categorySuccessRates: categoryAverages,
388+
exerciseErrors: exerciseErrors
389+
};
380390
}
381391

382392
async function runSingleTest(runNumber) {
@@ -431,7 +441,7 @@ async function runSingleTest(runNumber) {
431441

432442
// Calculer le total d'exercices
433443
const totalExercises = Object.values(categoriesToTest).reduce((sum, exercises) => sum + exercises.length, 0);
434-
console.log(`\n🧪 [RUN ${runNumber}/10] Starting evaluation of ${totalExercises} exercises across ${Object.keys(categoriesToTest).length} categories...`);
444+
console.log(`\n🧪 [RUN ${runNumber}/2] Starting evaluation of ${totalExercises} exercises across ${Object.keys(categoriesToTest).length} categories...`);
435445

436446
// Traiter les catégories en parallèle
437447
const startTime = Date.now();
@@ -464,7 +474,7 @@ async function runSingleTest(runNumber) {
464474
}
465475

466476
async function main() {
467-
const NUM_RUNS = 10;
477+
const NUM_RUNS = 2;
468478
const allResults = [];
469479

470480
console.log(`🚀 Starting ${NUM_RUNS} successive test runs...`);

0 commit comments

Comments
 (0)