FEAT: add gemma-2-it (#1774)

xorbitsai · Jul 5, 2024 · 007408c · 007408c
1 parent aada9b4
commit 007408c
Show file tree

Hide file tree

Showing 2 changed files with 140 additions and 0 deletions.
diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
@@ -6143,6 +6143,99 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "gemma-2-it",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-2-9b-it"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-2-27b-it"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/gemma-2-9b-it-4bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/gemma-2-9b-it-8bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "None"
+        ],
+        "model_id": "mlx-community/gemma-2-9b-it-fp16"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/gemma-2-27b-it-4bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/gemma-2-27b-it-8bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "None"
+        ],
+        "model_id": "mlx-community/gemma-2-27b-it-fp16"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "gemma",
+      "roles": [
+        "user",
+        "model"
+      ],
+      "stop": [
+        "<end_of_turn>",
+        "<start_of_turn>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 4096,

diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
@@ -3738,6 +3738,53 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "gemma-2-it",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "AI-ModelScope/gemma-2-9b-it",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "AI-ModelScope/gemma-2-27b-it",
+        "model_hub": "modelscope"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "gemma",
+      "roles": [
+        "user",
+        "model"
+      ],
+      "stop": [
+        "<end_of_turn>",
+        "<start_of_turn>"
+      ]
+    }
+  },
   {
     "version":1,
     "context_length":2048,