@@ -215,7 +215,7 @@ async function testExercise(exercise, starklingsPath, runNumber = 1) {
215215 log ( `Updated exercise file with generated code` ) ;
216216
217217 // Sauvegarder les fichiers de debug SEULEMENT pour le dernier run (run 10)
218- if ( SAVE_RESPONSES && runNumber === 10 ) {
218+ if ( SAVE_RESPONSES && runNumber === 2 ) {
219219 const solutionFile = path . join ( __dirname , '..' , '..' , 'debug' , `${ exercise . name } _solution.cairo` ) ;
220220 fs . mkdirSync ( path . dirname ( solutionFile ) , { recursive : true } ) ;
221221 fs . writeFileSync ( solutionFile , correctedCode ) ;
@@ -248,7 +248,7 @@ async function testExercise(exercise, starklingsPath, runNumber = 1) {
248248 } ;
249249
250250 // Sauvegarder les erreurs SEULEMENT pour le dernier run
251- if ( SAVE_RESPONSES && runNumber === 10 ) {
251+ if ( SAVE_RESPONSES && runNumber === 2 ) {
252252 const errorFile = path . join ( __dirname , '..' , '..' , 'debug' , `${ exercise . name } _error.txt` ) ;
253253 fs . writeFileSync ( errorFile , `Exit code: ${ error . status } \n\nSTDOUT:\n${ error . stdout } \n\nSTDERR:\n${ error . stderr } ` ) ;
254254 log ( `Error details saved to: ${ errorFile } ` ) ;
@@ -328,55 +328,65 @@ function extractCairoCode(generatedResponse) {
328328}
329329
330330function generateConsolidatedReport ( allResults ) {
331- if ( allResults . length === 0 ) {
332- return { error : 'No successful runs' } ;
333- }
334-
335- const successRates = allResults . map ( r => parseFloat ( r . globalSuccessRate ) ) ;
336- const averageSuccessRate = ( successRates . reduce ( ( sum , rate ) => sum + rate , 0 ) / successRates . length ) . toFixed ( 1 ) ;
337-
338- const bestRun = allResults . reduce ( ( best , current ) =>
339- parseFloat ( current . globalSuccessRate ) > parseFloat ( best . globalSuccessRate ) ? current : best
340- ) ;
341-
342- const worstRun = allResults . reduce ( ( worst , current ) =>
343- parseFloat ( current . globalSuccessRate ) < parseFloat ( worst . globalSuccessRate ) ? current : worst
344- ) ;
345-
346- // Analyse par catégorie
347- const categoryStats = { } ;
348- allResults . forEach ( run => {
349- run . categories . forEach ( category => {
350- if ( ! categoryStats [ category . category ] ) {
351- categoryStats [ category . category ] = {
352- successRates : [ ] ,
353- averageSuccessRate : 0 ,
354- bestRate : 0 ,
355- worstRate : 100
356- } ;
357- }
358-
359- const rate = parseFloat ( category . successRate ) ;
360- categoryStats [ category . category ] . successRates . push ( rate ) ;
361- categoryStats [ category . category ] . bestRate = Math . max ( categoryStats [ category . category ] . bestRate , rate ) ;
362- categoryStats [ category . category ] . worstRate = Math . min ( categoryStats [ category . category ] . worstRate , rate ) ;
363- } ) ;
364- } ) ;
365-
366- // Calculer les moyennes par catégorie
367- Object . keys ( categoryStats ) . forEach ( category => {
368- const rates = categoryStats [ category ] . successRates ;
369- categoryStats [ category ] . averageSuccessRate = ( rates . reduce ( ( sum , rate ) => sum + rate , 0 ) / rates . length ) . toFixed ( 1 ) ;
370- } ) ;
371-
372- return {
373- totalRuns : allResults . length ,
374- averageSuccessRate : averageSuccessRate ,
375- bestRun : bestRun ,
376- worstRun : worstRun ,
377- categoryStats : categoryStats ,
378- allRuns : allResults
379- } ;
331+ if ( allResults . length === 0 ) {
332+ return { error : 'No successful runs' } ;
333+ }
334+
335+ // Taux de réussite global
336+ const successRates = allResults . map ( r => parseFloat ( r . globalSuccessRate ) ) ;
337+ const averageSuccessRate = ( successRates . reduce ( ( sum , rate ) => sum + rate , 0 ) / successRates . length ) . toFixed ( 1 ) ;
338+
339+ // Taux de réussite par catégorie
340+ const categoryStats = { } ;
341+ allResults . forEach ( run => {
342+ run . categories . forEach ( category => {
343+ if ( ! categoryStats [ category . category ] ) {
344+ categoryStats [ category . category ] = {
345+ successRates : [ ]
346+ } ;
347+ }
348+ categoryStats [ category . category ] . successRates . push ( parseFloat ( category . successRate ) ) ;
349+ } ) ;
350+ } ) ;
351+
352+ // Calculer les moyennes par catégorie
353+ const categoryAverages = { } ;
354+ Object . keys ( categoryStats ) . forEach ( category => {
355+ const rates = categoryStats [ category ] . successRates ;
356+ categoryAverages [ category ] = ( rates . reduce ( ( sum , rate ) => sum + rate , 0 ) / rates . length ) . toFixed ( 1 ) + '%' ;
357+ } ) ;
358+
359+ // Collecter les erreurs par exercice et par run
360+ const exerciseErrors = { } ;
361+ allResults . forEach ( run => {
362+ run . categories . forEach ( category => {
363+ category . exercises . forEach ( exercise => {
364+ if ( ! exercise . success && exercise . error ) {
365+ if ( ! exerciseErrors [ exercise . name ] ) {
366+ exerciseErrors [ exercise . name ] = [ ] ;
367+ }
368+
369+ // Ajouter l'erreur avec le numéro de run
370+ exerciseErrors [ exercise . name ] . push ( {
371+ run : run . runNumber ,
372+ type : exercise . error . type || 'COMPILATION_ERROR' ,
373+ message : exercise . error . message || 'Compilation failed' ,
374+ stdout : exercise . error . stdout ? exercise . error . stdout . substring ( 0 , 500 ) : null ,
375+ stderr : exercise . error . stderr ? exercise . error . stderr . substring ( 0 , 500 ) : null
376+ } ) ;
377+ }
378+ } ) ;
379+ } ) ;
380+ } ) ;
381+
382+ return {
383+ summary : {
384+ totalRuns : allResults . length ,
385+ globalSuccessRate : averageSuccessRate + '%'
386+ } ,
387+ categorySuccessRates : categoryAverages ,
388+ exerciseErrors : exerciseErrors
389+ } ;
380390}
381391
382392async function runSingleTest ( runNumber ) {
@@ -431,7 +441,7 @@ async function runSingleTest(runNumber) {
431441
432442 // Calculer le total d'exercices
433443 const totalExercises = Object . values ( categoriesToTest ) . reduce ( ( sum , exercises ) => sum + exercises . length , 0 ) ;
434- console . log ( `\n🧪 [RUN ${ runNumber } /10 ] Starting evaluation of ${ totalExercises } exercises across ${ Object . keys ( categoriesToTest ) . length } categories...` ) ;
444+ console . log ( `\n🧪 [RUN ${ runNumber } /2 ] Starting evaluation of ${ totalExercises } exercises across ${ Object . keys ( categoriesToTest ) . length } categories...` ) ;
435445
436446 // Traiter les catégories en parallèle
437447 const startTime = Date . now ( ) ;
@@ -464,7 +474,7 @@ async function runSingleTest(runNumber) {
464474}
465475
466476async function main ( ) {
467- const NUM_RUNS = 10 ;
477+ const NUM_RUNS = 2 ;
468478 const allResults = [ ] ;
469479
470480 console . log ( `🚀 Starting ${ NUM_RUNS } successive test runs...` ) ;
0 commit comments