|
314 | 314 | }, |
315 | 315 | { |
316 | 316 | "cell_type": "code", |
317 | | - "execution_count": 4, |
| 317 | + "execution_count": null, |
318 | 318 | "metadata": { |
319 | 319 | "id": "LOUHK7PtQfq4" |
320 | 320 | }, |
321 | | - "outputs": [ |
322 | | - { |
323 | | - "name": "stdout", |
324 | | - "output_type": "stream", |
325 | | - "text": [ |
326 | | - "2023-11-29:11:54:55,156 INFO [utils.py:160] NumExpr defaulting to 2 threads.\n", |
327 | | - "2023-11-29 11:54:55.942051: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", |
328 | | - "2023-11-29 11:54:55.942108: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", |
329 | | - "2023-11-29 11:54:55.942142: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", |
330 | | - "2023-11-29 11:54:57.066802: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", |
331 | | - "2023-11-29:11:55:00,954 INFO [__main__.py:132] Verbosity set to INFO\n", |
332 | | - "2023-11-29:11:55:11,038 WARNING [__main__.py:138] --limit SHOULD ONLY BE USED FOR TESTING.REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.\n", |
333 | | - "2023-11-29:11:55:11,038 INFO [__main__.py:143] Including path: ./\n", |
334 | | - "2023-11-29:11:55:11,046 INFO [__main__.py:205] Selected Tasks: ['demo_boolq']\n", |
335 | | - "2023-11-29:11:55:11,047 WARNING [evaluator.py:93] generation_kwargs specified through cli, these settings will be used over set parameters in yaml tasks.\n", |
336 | | - "2023-11-29:11:55:11,110 INFO [huggingface.py:120] Using device 'cuda'\n", |
337 | | - "config.json: 100% 571/571 [00:00<00:00, 2.87MB/s]\n", |
338 | | - "model.safetensors: 100% 5.68G/5.68G [00:32<00:00, 173MB/s]\n", |
339 | | - "tokenizer_config.json: 100% 396/396 [00:00<00:00, 2.06MB/s]\n", |
340 | | - "tokenizer.json: 100% 2.11M/2.11M [00:00<00:00, 11.6MB/s]\n", |
341 | | - "special_tokens_map.json: 100% 99.0/99.0 [00:00<00:00, 555kB/s]\n", |
342 | | - "2023-11-29:11:56:18,658 WARNING [task.py:614] [Task: demo_boolq] metric acc is defined, but aggregation is not. using default aggregation=mean\n", |
343 | | - "2023-11-29:11:56:18,658 WARNING [task.py:626] [Task: demo_boolq] metric acc is defined, but higher_is_better is not. using default higher_is_better=True\n", |
344 | | - "Downloading builder script: 100% 30.7k/30.7k [00:00<00:00, 59.0MB/s]\n", |
345 | | - "Downloading metadata: 100% 38.7k/38.7k [00:00<00:00, 651kB/s]\n", |
346 | | - "Downloading readme: 100% 14.8k/14.8k [00:00<00:00, 37.3MB/s]\n", |
347 | | - "Downloading data: 100% 4.12M/4.12M [00:00<00:00, 55.1MB/s]\n", |
348 | | - "Generating train split: 100% 9427/9427 [00:00<00:00, 15630.89 examples/s]\n", |
349 | | - "Generating validation split: 100% 3270/3270 [00:00<00:00, 20002.56 examples/s]\n", |
350 | | - "Generating test split: 100% 3245/3245 [00:00<00:00, 20866.19 examples/s]\n", |
351 | | - "2023-11-29:11:56:22,315 INFO [task.py:355] Building contexts for task on rank 0...\n", |
352 | | - "2023-11-29:11:56:22,322 INFO [evaluator.py:319] Running loglikelihood requests\n", |
353 | | - "100% 20/20 [00:04<00:00, 4.37it/s]\n", |
354 | | - "fatal: not a git repository (or any of the parent directories): .git\n", |
355 | | - "hf (pretrained=EleutherAI/pythia-2.8b), gen_kwargs: (), limit: 10.0, num_fewshot: None, batch_size: 1\n", |
356 | | - "| Tasks |Version|Filter|n-shot|Metric|Value| |Stderr|\n", |
357 | | - "|----------|-------|------|-----:|------|----:|---|-----:|\n", |
358 | | - "|demo_boolq|Yaml |none | 0|acc | 1|± | 0|\n", |
359 | | - "\n" |
360 | | - ] |
361 | | - } |
362 | | - ], |
363 | | - "source": [ |
364 | | - "%env LOGLEVEL=DEBUG\n", |
365 | | - "!lm_eval \\\n", |
366 | | - " --model hf \\\n", |
367 | | - " --model_args pretrained=EleutherAI/pythia-2.8b \\\n", |
368 | | - " --include_path ./ \\\n", |
369 | | - " --tasks demo_boolq \\\n", |
370 | | - " --limit 10" |
371 | | - ] |
| 321 | + "outputs": [], |
| 322 | + "source": "%env LMEVAL_LOG_LEVEL=DEBUG\n!lm_eval \\\n --model hf \\\n --model_args pretrained=EleutherAI/pythia-2.8b \\\n --include_path ./ \\\n --tasks demo_boolq \\\n --limit 10" |
372 | 323 | }, |
373 | 324 | { |
374 | 325 | "cell_type": "markdown", |
|
415 | 366 | }, |
416 | 367 | { |
417 | 368 | "cell_type": "code", |
418 | | - "execution_count": 6, |
| 369 | + "execution_count": null, |
419 | 370 | "metadata": { |
420 | 371 | "id": "XceRKCuuDtbn" |
421 | 372 | }, |
422 | | - "outputs": [ |
423 | | - { |
424 | | - "name": "stdout", |
425 | | - "output_type": "stream", |
426 | | - "text": [ |
427 | | - "2023-11-29:11:56:33,016 INFO [utils.py:160] NumExpr defaulting to 2 threads.\n", |
428 | | - "2023-11-29 11:56:33.852995: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", |
429 | | - "2023-11-29 11:56:33.853050: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", |
430 | | - "2023-11-29 11:56:33.853087: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", |
431 | | - "2023-11-29 11:56:35.129047: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", |
432 | | - "2023-11-29:11:56:38,546 INFO [__main__.py:132] Verbosity set to INFO\n", |
433 | | - "2023-11-29:11:56:47,509 WARNING [__main__.py:138] --limit SHOULD ONLY BE USED FOR TESTING.REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.\n", |
434 | | - "2023-11-29:11:56:47,509 INFO [__main__.py:143] Including path: ./\n", |
435 | | - "2023-11-29:11:56:47,517 INFO [__main__.py:205] Selected Tasks: ['yes_or_no_tasks']\n", |
436 | | - "2023-11-29:11:56:47,520 WARNING [evaluator.py:93] generation_kwargs specified through cli, these settings will be used over set parameters in yaml tasks.\n", |
437 | | - "2023-11-29:11:56:47,550 INFO [huggingface.py:120] Using device 'cuda'\n", |
438 | | - "2023-11-29:11:57:08,743 WARNING [task.py:614] [Task: demo_cola] metric acc is defined, but aggregation is not. using default aggregation=mean\n", |
439 | | - "2023-11-29:11:57:08,743 WARNING [task.py:626] [Task: demo_cola] metric acc is defined, but higher_is_better is not. using default higher_is_better=True\n", |
440 | | - "Downloading builder script: 100% 28.8k/28.8k [00:00<00:00, 52.7MB/s]\n", |
441 | | - "Downloading metadata: 100% 28.7k/28.7k [00:00<00:00, 51.9MB/s]\n", |
442 | | - "Downloading readme: 100% 27.9k/27.9k [00:00<00:00, 48.0MB/s]\n", |
443 | | - "Downloading data: 100% 377k/377k [00:00<00:00, 12.0MB/s]\n", |
444 | | - "Generating train split: 100% 8551/8551 [00:00<00:00, 19744.58 examples/s]\n", |
445 | | - "Generating validation split: 100% 1043/1043 [00:00<00:00, 27057.01 examples/s]\n", |
446 | | - "Generating test split: 100% 1063/1063 [00:00<00:00, 22705.17 examples/s]\n", |
447 | | - "2023-11-29:11:57:11,698 INFO [task.py:355] Building contexts for task on rank 0...\n", |
448 | | - "2023-11-29:11:57:11,704 INFO [evaluator.py:319] Running loglikelihood requests\n", |
449 | | - "100% 20/20 [00:03<00:00, 5.15it/s]\n", |
450 | | - "fatal: not a git repository (or any of the parent directories): .git\n", |
451 | | - "hf (pretrained=EleutherAI/pythia-2.8b), gen_kwargs: (), limit: 10.0, num_fewshot: None, batch_size: 1\n", |
452 | | - "| Tasks |Version|Filter|n-shot|Metric|Value| |Stderr|\n", |
453 | | - "|---------------|-------|------|-----:|------|----:|---|-----:|\n", |
454 | | - "|yes_or_no_tasks|N/A |none | 0|acc | 0.7|± |0.1528|\n", |
455 | | - "| - demo_cola |Yaml |none | 0|acc | 0.7|± |0.1528|\n", |
456 | | - "\n", |
457 | | - "| Groups |Version|Filter|n-shot|Metric|Value| |Stderr|\n", |
458 | | - "|---------------|-------|------|-----:|------|----:|---|-----:|\n", |
459 | | - "|yes_or_no_tasks|N/A |none | 0|acc | 0.7|± |0.1528|\n", |
460 | | - "\n" |
461 | | - ] |
462 | | - } |
463 | | - ], |
464 | | - "source": [ |
465 | | - "# !accelerate launch --no_python\n", |
466 | | - "%env LOGLEVEL=DEBUG\n", |
467 | | - "!lm_eval \\\n", |
468 | | - " --model hf \\\n", |
469 | | - " --model_args pretrained=EleutherAI/pythia-2.8b \\\n", |
470 | | - " --include_path ./ \\\n", |
471 | | - " --tasks yes_or_no_tasks \\\n", |
472 | | - " --limit 10 \\\n", |
473 | | - " --output output/yes_or_no_tasks/ \\\n", |
474 | | - " --log_samples" |
475 | | - ] |
| 373 | + "outputs": [], |
| 374 | + "source": "# !accelerate launch --no_python\n%env LMEVAL_LOG_LEVEL=DEBUG\n!lm_eval \\\n --model hf \\\n --model_args pretrained=EleutherAI/pythia-2.8b \\\n --include_path ./ \\\n --tasks yes_or_no_tasks \\\n --limit 10 \\\n --output output/yes_or_no_tasks/ \\\n --log_samples" |
476 | 375 | }, |
477 | 376 | { |
478 | 377 | "cell_type": "markdown", |
|
520 | 419 | }, |
521 | 420 | { |
522 | 421 | "cell_type": "code", |
523 | | - "execution_count": 8, |
| 422 | + "execution_count": null, |
524 | 423 | "metadata": { |
525 | 424 | "id": "jyKOfCsKb-xy" |
526 | 425 | }, |
527 | | - "outputs": [ |
528 | | - { |
529 | | - "name": "stdout", |
530 | | - "output_type": "stream", |
531 | | - "text": [ |
532 | | - "2023-11-29:11:57:23,598 INFO [utils.py:160] NumExpr defaulting to 2 threads.\n", |
533 | | - "2023-11-29 11:57:24.719750: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", |
534 | | - "2023-11-29 11:57:24.719806: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", |
535 | | - "2023-11-29 11:57:24.719847: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", |
536 | | - "2023-11-29 11:57:26.656125: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", |
537 | | - "2023-11-29:11:57:31,563 INFO [__main__.py:132] Verbosity set to INFO\n", |
538 | | - "2023-11-29:11:57:40,541 WARNING [__main__.py:138] --limit SHOULD ONLY BE USED FOR TESTING.REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.\n", |
539 | | - "2023-11-29:11:57:40,541 INFO [__main__.py:143] Including path: ./\n", |
540 | | - "2023-11-29:11:57:40,558 INFO [__main__.py:205] Selected Tasks: ['demo_mmlu_high_school_geography']\n", |
541 | | - "2023-11-29:11:57:40,559 WARNING [evaluator.py:93] generation_kwargs specified through cli, these settings will be used over set parameters in yaml tasks.\n", |
542 | | - "2023-11-29:11:57:40,589 INFO [huggingface.py:120] Using device 'cuda'\n", |
543 | | - "Downloading builder script: 100% 5.84k/5.84k [00:00<00:00, 17.7MB/s]\n", |
544 | | - "Downloading metadata: 100% 106k/106k [00:00<00:00, 892kB/s] \n", |
545 | | - "Downloading readme: 100% 39.7k/39.7k [00:00<00:00, 631kB/s]\n", |
546 | | - "Downloading data: 100% 166M/166M [00:01<00:00, 89.0MB/s]\n", |
547 | | - "Generating auxiliary_train split: 100% 99842/99842 [00:07<00:00, 12536.83 examples/s]\n", |
548 | | - "Generating test split: 100% 198/198 [00:00<00:00, 1439.20 examples/s]\n", |
549 | | - "Generating validation split: 100% 22/22 [00:00<00:00, 4181.76 examples/s]\n", |
550 | | - "Generating dev split: 100% 5/5 [00:00<00:00, 36.25 examples/s]\n", |
551 | | - "2023-11-29:11:58:09,798 INFO [task.py:355] Building contexts for task on rank 0...\n", |
552 | | - "2023-11-29:11:58:09,822 INFO [evaluator.py:319] Running loglikelihood requests\n", |
553 | | - "100% 40/40 [00:05<00:00, 7.86it/s]\n", |
554 | | - "fatal: not a git repository (or any of the parent directories): .git\n", |
555 | | - "hf (pretrained=EleutherAI/pythia-2.8b), gen_kwargs: (), limit: 10.0, num_fewshot: None, batch_size: 1\n", |
556 | | - "| Tasks |Version|Filter|n-shot| Metric |Value| |Stderr|\n", |
557 | | - "|-------------------------------|-------|------|-----:|--------|----:|---|-----:|\n", |
558 | | - "|demo_mmlu_high_school_geography|Yaml |none | 0|acc | 0.3|± |0.1528|\n", |
559 | | - "| | |none | 0|acc_norm| 0.3|± |0.1528|\n", |
560 | | - "\n" |
561 | | - ] |
562 | | - } |
563 | | - ], |
564 | | - "source": [ |
565 | | - "# !accelerate launch --no_python\n", |
566 | | - "%env LOGLEVEL=DEBUG\n", |
567 | | - "!lm_eval \\\n", |
568 | | - " --model hf \\\n", |
569 | | - " --model_args pretrained=EleutherAI/pythia-2.8b \\\n", |
570 | | - " --include_path ./ \\\n", |
571 | | - " --tasks demo_mmlu_high_school_geography \\\n", |
572 | | - " --limit 10 \\\n", |
573 | | - " --output output/mmlu_high_school_geography/ \\\n", |
574 | | - " --log_samples" |
575 | | - ] |
| 426 | + "outputs": [], |
| 427 | + "source": "# !accelerate launch --no_python\n%env LMEVAL_LOG_LEVEL=DEBUG\n!lm_eval \\\n --model hf \\\n --model_args pretrained=EleutherAI/pythia-2.8b \\\n --include_path ./ \\\n --tasks demo_mmlu_high_school_geography \\\n --limit 10 \\\n --output output/mmlu_high_school_geography/ \\\n --log_samples" |
576 | 428 | }, |
577 | 429 | { |
578 | 430 | "cell_type": "markdown", |
|
605 | 457 | }, |
606 | 458 | { |
607 | 459 | "cell_type": "code", |
608 | | - "execution_count": 10, |
| 460 | + "execution_count": null, |
609 | 461 | "metadata": { |
610 | 462 | "id": "-_CVnDirdy7j" |
611 | 463 | }, |
612 | | - "outputs": [ |
613 | | - { |
614 | | - "name": "stdout", |
615 | | - "output_type": "stream", |
616 | | - "text": [ |
617 | | - "2023-11-29:11:58:21,284 INFO [utils.py:160] NumExpr defaulting to 2 threads.\n", |
618 | | - "2023-11-29 11:58:22.850159: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", |
619 | | - "2023-11-29 11:58:22.850219: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", |
620 | | - "2023-11-29 11:58:22.850254: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", |
621 | | - "2023-11-29 11:58:24.948103: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", |
622 | | - "2023-11-29:11:58:28,460 INFO [__main__.py:132] Verbosity set to INFO\n", |
623 | | - "2023-11-29:11:58:37,935 WARNING [__main__.py:138] --limit SHOULD ONLY BE USED FOR TESTING.REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.\n", |
624 | | - "2023-11-29:11:58:37,935 INFO [__main__.py:143] Including path: ./\n", |
625 | | - "2023-11-29:11:58:37,969 INFO [__main__.py:205] Selected Tasks: ['demo_mmlu_high_school_geography_continuation']\n", |
626 | | - "2023-11-29:11:58:37,972 WARNING [evaluator.py:93] generation_kwargs specified through cli, these settings will be used over set parameters in yaml tasks.\n", |
627 | | - "2023-11-29:11:58:38,008 INFO [huggingface.py:120] Using device 'cuda'\n", |
628 | | - "2023-11-29:11:58:59,758 INFO [task.py:355] Building contexts for task on rank 0...\n", |
629 | | - "2023-11-29:11:58:59,777 INFO [evaluator.py:319] Running loglikelihood requests\n", |
630 | | - "100% 40/40 [00:02<00:00, 16.23it/s]\n", |
631 | | - "fatal: not a git repository (or any of the parent directories): .git\n", |
632 | | - "hf (pretrained=EleutherAI/pythia-2.8b), gen_kwargs: (), limit: 10.0, num_fewshot: None, batch_size: 1\n", |
633 | | - "| Tasks |Version|Filter|n-shot| Metric |Value| |Stderr|\n", |
634 | | - "|--------------------------------------------|-------|------|-----:|--------|----:|---|-----:|\n", |
635 | | - "|demo_mmlu_high_school_geography_continuation|Yaml |none | 0|acc | 0.1|± |0.1000|\n", |
636 | | - "| | |none | 0|acc_norm| 0.2|± |0.1333|\n", |
637 | | - "\n" |
638 | | - ] |
639 | | - } |
640 | | - ], |
641 | | - "source": [ |
642 | | - "# !accelerate launch --no_python\n", |
643 | | - "%env LOGLEVEL=DEBUG\n", |
644 | | - "!lm_eval \\\n", |
645 | | - " --model hf \\\n", |
646 | | - " --model_args pretrained=EleutherAI/pythia-2.8b \\\n", |
647 | | - " --include_path ./ \\\n", |
648 | | - " --tasks demo_mmlu_high_school_geography_continuation \\\n", |
649 | | - " --limit 10 \\\n", |
650 | | - " --output output/mmlu_high_school_geography_continuation/ \\\n", |
651 | | - " --log_samples" |
652 | | - ] |
| 464 | + "outputs": [], |
| 465 | + "source": "# !accelerate launch --no_python\n%env LMEVAL_LOG_LEVEL=DEBUG\n!lm_eval \\\n --model hf \\\n --model_args pretrained=EleutherAI/pythia-2.8b \\\n --include_path ./ \\\n --tasks demo_mmlu_high_school_geography_continuation \\\n --limit 10 \\\n --output output/mmlu_high_school_geography_continuation/ \\\n --log_samples" |
653 | 466 | }, |
654 | 467 | { |
655 | 468 | "cell_type": "markdown", |
|
0 commit comments