|
50 | 50 | "name": "AWQ TinyChat", |
51 | 51 | "tags": "api" |
52 | 52 | }, |
| 53 | + "vllm": { |
| 54 | + "name": "vLLM", |
| 55 | + "tags": "api", |
| 56 | + "links": { |
| 57 | + "mlc": { |
| 58 | + "name": "vLLM", |
| 59 | + "url": "https://github.com/vllm-project/vllm" |
| 60 | + } |
| 61 | + } |
| 62 | + }, |
| 63 | + "vllm:jp6": { |
| 64 | + "name": "dustynv/vllm:0.7.4-r36.4.0-cu128-24.04", |
| 65 | + "docker_image": "dustynv/vllm:0.7.4-r36.4.0-cu128-24.04", |
| 66 | + "docker_cmd": "vllm serve ${MODEL}", |
| 67 | + "docker_args": "--host=${SERVER_ADDR} --port=${SERVER_PORT} --dtype=auto --max-num-seqs=${MAX_BATCH_SIZE} --max-model-len=${MAX_CONTEXT_LEN} --gpu-memory-utilization=0.75", |
| 68 | + "docker_options": "-it --rm", |
| 69 | + "server_host": "0.0.0.0:9000", |
| 70 | + "tags": [ |
| 71 | + "container", |
| 72 | + "vllm", |
| 73 | + "l4t-r36" |
| 74 | + ] |
| 75 | + }, |
53 | 76 | "sudonim": { |
54 | 77 | "docker_cmd": "sudonim serve", |
55 | 78 | "docker_options": "-it --rm", |
|
65 | 88 | ], |
66 | 89 | "help": "The inference API and type of quantization used." |
67 | 90 | }, |
| 91 | + "fp16": { |
| 92 | + "name": "fp16 (vLLM)", |
| 93 | + "tags": [ |
| 94 | + "quantization", |
| 95 | + "vllm" |
| 96 | + ] |
| 97 | + }, |
68 | 98 | "q4f16_ft": { |
69 | 99 | "name": "q4f16_ft (MLC)", |
70 | 100 | "tags": [ |
|
2648 | 2678 | "max_context_len": 8192, |
2649 | 2679 | "prefill_chunk": 8192 |
2650 | 2680 | }, |
| 2681 | + "gemma-3": { |
| 2682 | + "name": "Google Gemma 3", |
| 2683 | + "header": "gemma-3-header", |
| 2684 | + "max_context_len": { |
| 2685 | + "placeholder": 4096 |
| 2686 | + }, |
| 2687 | + "prefill_chunk": { |
| 2688 | + "placeholder": 4096 |
| 2689 | + }, |
| 2690 | + "blacklist": [ |
| 2691 | + "mlc", |
| 2692 | + "llama_cpp" |
| 2693 | + ], |
| 2694 | + "tags": [ |
| 2695 | + "llm" |
| 2696 | + ], |
| 2697 | + "links": { |
| 2698 | + "google": { |
| 2699 | + "name": "Google", |
| 2700 | + "url": "https://ai.google.dev/gemma" |
| 2701 | + }, |
| 2702 | + "hf": { |
| 2703 | + "name": "Hugging Face", |
| 2704 | + "color": "yellow", |
| 2705 | + "url": "Google-Gemma-3" |
| 2706 | + } |
| 2707 | + }, |
| 2708 | + "url": "Google-Gemma-3" |
| 2709 | + }, |
| 2710 | + "gemma-3-1b-it": { |
| 2711 | + "name": "Gemma 3 1B", |
| 2712 | + "url": "hf.co/google/gemma-3-1b-it", |
| 2713 | + "blacklist": [ |
| 2714 | + "mlc", |
| 2715 | + "llama_cpp" |
| 2716 | + ], |
| 2717 | + "tags": [ |
| 2718 | + "gemma-3", |
| 2719 | + "orin" |
| 2720 | + ], |
| 2721 | + "links": { |
| 2722 | + "hf": { |
| 2723 | + "name": "Hugging Face", |
| 2724 | + "color": "yellow", |
| 2725 | + "url": "hf.co/google/gemma-3-1b-it" |
| 2726 | + } |
| 2727 | + }, |
| 2728 | + "created_at": "2025-03-10 12:09:00+00:00", |
| 2729 | + "last_modified": "2025-03-12 14:50:25+00:00" |
| 2730 | + }, |
| 2731 | + "gemma-3-4b-it": { |
| 2732 | + "name": "Gemma 3 4B", |
| 2733 | + "url": "hf.co/google/gemma-3-4b-it", |
| 2734 | + "blacklist": [ |
| 2735 | + "mlc", |
| 2736 | + "llama_cpp" |
| 2737 | + ], |
| 2738 | + "tags": [ |
| 2739 | + "gemma-3", |
| 2740 | + "orin" |
| 2741 | + ], |
| 2742 | + "links": { |
| 2743 | + "hf": { |
| 2744 | + "name": "Hugging Face", |
| 2745 | + "color": "yellow", |
| 2746 | + "url": "hf.co/google/gemma-3-4b-it" |
| 2747 | + } |
| 2748 | + }, |
| 2749 | + "created_at": "2025-02-20 21:20:07+00:00", |
| 2750 | + "last_modified": "2025-03-12 08:30:08+00:00" |
| 2751 | + }, |
| 2752 | + "gemma-3-12b-it": { |
| 2753 | + "name": "Gemma 3 12B", |
| 2754 | + "url": "hf.co/google/gemma-3-12b-it", |
| 2755 | + "blacklist": [ |
| 2756 | + "mlc", |
| 2757 | + "llama_cpp" |
| 2758 | + ], |
| 2759 | + "tags": [ |
| 2760 | + "gemma-3", |
| 2761 | + "orin-nx", |
| 2762 | + "agx-orin" |
| 2763 | + ], |
| 2764 | + "links": { |
| 2765 | + "hf": { |
| 2766 | + "name": "Hugging Face", |
| 2767 | + "color": "yellow", |
| 2768 | + "url": "hf.co/google/gemma-3-12b-it" |
| 2769 | + } |
| 2770 | + }, |
| 2771 | + "created_at": "2025-03-01 19:11:34+00:00", |
| 2772 | + "last_modified": "2025-03-12 08:30:33+00:00" |
| 2773 | + }, |
| 2774 | + "gemma-3-27b-it": { |
| 2775 | + "name": "Gemma 3 27B", |
| 2776 | + "url": "hf.co/google/gemma-3-27b-it", |
| 2777 | + "blacklist": [ |
| 2778 | + "mlc", |
| 2779 | + "llama_cpp" |
| 2780 | + ], |
| 2781 | + "tags": [ |
| 2782 | + "gemma-3", |
| 2783 | + "agx-orin" |
| 2784 | + ], |
| 2785 | + "links": { |
| 2786 | + "hf": { |
| 2787 | + "name": "Hugging Face", |
| 2788 | + "color": "yellow", |
| 2789 | + "url": "hf.co/google/gemma-3-27b-it" |
| 2790 | + } |
| 2791 | + }, |
| 2792 | + "created_at": "2025-03-01 19:10:19+00:00", |
| 2793 | + "last_modified": "2025-03-12 08:30:59+00:00" |
| 2794 | + }, |
| 2795 | + "gemma-3-1b-it-fp16-vllm-jp6": { |
| 2796 | + "title": "Gemma 3 1B \u276f vLLM fp16 \u276f JetPack 6.1+", |
| 2797 | + "quantization": "fp16", |
| 2798 | + "tags": [ |
| 2799 | + "gemma-3-1b-it", |
| 2800 | + "fp16", |
| 2801 | + "vllm:jp6" |
| 2802 | + ] |
| 2803 | + }, |
| 2804 | + "gemma-3-1b-it-q4_0-ollama-jp6": { |
| 2805 | + "title": "Gemma 3 1B \u276f ollama q4_0 \u276f JetPack 6.1+", |
| 2806 | + "quantization": "q4_0", |
| 2807 | + "tags": [ |
| 2808 | + "gemma-3-1b-it", |
| 2809 | + "q4_0", |
| 2810 | + "ollama:jp6" |
| 2811 | + ] |
| 2812 | + }, |
| 2813 | + "gemma-3-4b-it-fp16-vllm-jp6": { |
| 2814 | + "title": "Gemma 3 4B \u276f vLLM fp16 \u276f JetPack 6.1+", |
| 2815 | + "quantization": "fp16", |
| 2816 | + "tags": [ |
| 2817 | + "gemma-3-4b-it", |
| 2818 | + "fp16", |
| 2819 | + "vllm:jp6" |
| 2820 | + ] |
| 2821 | + }, |
| 2822 | + "gemma-3-4b-it-q4_0-ollama-jp6": { |
| 2823 | + "title": "Gemma 3 4B \u276f ollama q4_0 \u276f JetPack 6.1+", |
| 2824 | + "quantization": "q4_0", |
| 2825 | + "tags": [ |
| 2826 | + "gemma-3-4b-it", |
| 2827 | + "q4_0", |
| 2828 | + "ollama:jp6" |
| 2829 | + ] |
| 2830 | + }, |
| 2831 | + "gemma-3-12b-it-fp16-vllm-jp6": { |
| 2832 | + "title": "Gemma 3 12B \u276f vLLM fp16 \u276f JetPack 6.1+", |
| 2833 | + "quantization": "fp16", |
| 2834 | + "tags": [ |
| 2835 | + "gemma-3-12b-it", |
| 2836 | + "fp16", |
| 2837 | + "vllm:jp6" |
| 2838 | + ] |
| 2839 | + }, |
| 2840 | + "gemma-3-12b-it-q4_0-ollama-jp6": { |
| 2841 | + "title": "Gemma 3 12B \u276f ollama q4_0 \u276f JetPack 6.1+", |
| 2842 | + "quantization": "q4_0", |
| 2843 | + "tags": [ |
| 2844 | + "gemma-3-12b-it", |
| 2845 | + "q4_0", |
| 2846 | + "ollama:jp6" |
| 2847 | + ] |
| 2848 | + }, |
| 2849 | + "gemma-3-27b-it-fp16-vllm-jp6": { |
| 2850 | + "title": "Gemma 3 27B \u276f vLLM fp16 \u276f JetPack 6.1+", |
| 2851 | + "quantization": "fp16", |
| 2852 | + "tags": [ |
| 2853 | + "gemma-3-27b-it", |
| 2854 | + "fp16", |
| 2855 | + "vllm:jp6" |
| 2856 | + ] |
| 2857 | + }, |
| 2858 | + "gemma-3-27b-it-q4_0-ollama-jp6": { |
| 2859 | + "title": "Gemma 3 27B \u276f ollama q4_0 \u276f JetPack 6.1+", |
| 2860 | + "quantization": "q4_0", |
| 2861 | + "tags": [ |
| 2862 | + "gemma-3-27b-it", |
| 2863 | + "q4_0", |
| 2864 | + "ollama:jp6" |
| 2865 | + ] |
| 2866 | + }, |
2651 | 2867 | "deepseek-r1-distill": { |
2652 | 2868 | "name": "DeepSeek R1", |
2653 | 2869 | "header": "deepseek-header", |
|
4830 | 5046 | "open_webui" |
4831 | 5047 | ], |
4832 | 5048 | "child_order": [ |
| 5049 | + "gemma-3", |
4833 | 5050 | "deepseek-r1-distill", |
4834 | 5051 | "qwen-2.5", |
4835 | 5052 | "llama-3", |
|
0 commit comments