Merge pull request #331 from kbenkhaled/functiongemma

kbenkhaled · web-flow · commit e2f80e37664b · 2025-12-18T12:03:20.000-05:00
FunctionGemma
diff --git a/src/components/RunCommand.astro b/src/components/RunCommand.astro
@@ -72,7 +72,7 @@ const engineUi = engines.map((e) => ({ id: e.id, label: e.label }));
 const engineImagesOrin: Record<string, string> = {
 	ollama: 'ollama/ollama:latest',
 	vllm: 'ghcr.io/nvidia-ai-iot/vllm:latest-jetson-orin',
-	llamacpp: 'ghcr.io/ggerganov/llama.cpp:server',
+	llamacpp: 'ghcr.io/nvidia-ai-iot/llama_cpp:latest-jetson-orin',
 	tensorrtllm: 'nvcr.io/nvidia/tensorrt-llm:latest',
 	diffusers: 'pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime',
 	comfyui: 'ghcr.io/comfyanonymous/comfyui:latest',
@@ -83,7 +83,7 @@ const engineImagesOrin: Record<string, string> = {
 const engineImagesThor: Record<string, string> = {
 	ollama: 'ollama/ollama:latest',
 	vllm: 'ghcr.io/nvidia-ai-iot/vllm:latest-jetson-thor',
-	llamacpp: 'ghcr.io/ggerganov/llama.cpp:server',
+	llamacpp: 'ghcr.io/nvidia-ai-iot/llama_cpp:latest-jetson-thor',
 	tensorrtllm: 'nvcr.io/nvidia/tensorrt-llm:latest',
 	diffusers: 'pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime',
 	comfyui: 'ghcr.io/comfyanonymous/comfyui:latest',
diff --git a/src/content/models/functiongemma.md b/src/content/models/functiongemma.md
@@ -0,0 +1,100 @@
+---
+title: "FunctionGemma"
+model_id: "functiongemma"
+short_description: "Google's specialized function calling model built on Gemma 3 270M, optimized for tool use"
+family: "Google Gemma3"
+icon: "💎"
+is_new: true
+order: 0.5
+type: "Text"
+memory_requirements: "1GB RAM"
+precision: "FP8"
+model_size: "0.5GB"
+hf_checkpoint: "ggml-org/functiongemma-270m-it-GGUF"
+minimum_jetson: "Orin Nano"
+supported_inference_engines:
+  - engine: "llama.cpp"
+    type: "Container"
+    run_command_orin: "sudo docker run -it --rm --runtime=nvidia --network host ghcr.io/nvidia-ai-iot/llama_cpp:latest-jetson-orin llama-server --jinja -fa on -hf ggml-org/functiongemma-270m-it-GGUF --alias functiongemma"
+    run_command_thor: "sudo docker run -it --rm --runtime=nvidia --network host ghcr.io/nvidia-ai-iot/llama_cpp:latest-jetson-thor llama-server --jinja -fa on -hf ggml-org/functiongemma-270m-it-GGUF --alias functiongemma"
+---
+
+FunctionGemma is a lightweight, open model from Google, built as a foundation for creating your own specialized function calling models. Built on the Gemma 3 270M model and with the same research and technology used to create the Gemini models, FunctionGemma has been trained specifically for function calling. The model has the same architecture as Gemma 3, but uses a different chat format optimized for tool use.
+
+**Note:** FunctionGemma is not intended for use as a direct dialogue model. It is designed to be highly performant after further fine-tuning, as is typical of models this size. The model is well suited for text-only function calling scenarios.
+
+This model is extremely good for applications like home assistant where based on voice actions, we pass it through text-to-speech (TTS) and then use the model for calling the appropriate tool. For example, commands like "close the lights," "open the garage," "set the thermostat to 72 degrees," or "turn on the coffee maker" can be processed efficiently. The model is capable of calling tools in parallel as well, making it efficient for handling multiple commands or complex multi-step actions.
+
+## Supported Platforms
+
+- ✅ Jetson Orin (Orin Nano, Orin NX, AGX Orin)
+- ✅ Jetson Thor
+
+You can use FunctionGemma with your favorite orchestration framework or any library/software that supports OpenAI-compatible API backends.
+
+## Getting Started
+
+### Quick Hello World Example
+
+Here's a simple CLI example to get you started with function calling:
+
+```bash
+curl http://localhost:8080/v1/chat/completions -d '{
+    "model": "functiongemma",
+    "messages": [
+        {"role": "system", "content": "You are a chatbot that uses tools/functions. Dont overthink things."},
+        {"role": "user", "content": "What is the weather in Istanbul?"}
+    ],
+    "tools": [{
+        "type":"function",
+        "function":{
+            "name":"get_current_weather",
+            "description":"Get the current weather in a given location",
+            "parameters":{
+                "type":"object",
+                "properties":{
+                    "location":{
+                        "type":"string",
+                        "description":"The city and country/state, e.g. `San Francisco, CA`, or `Paris, France`"
+                    }
+                },
+                "required":["location"]
+            }
+        }
+    }]
+}'
+```
+
+### Parallel Tool Calling
+
+To enable parallel tool calling, simply add `"parallel_tool_calls": true` to your request payload:
+
+```bash
+curl http://localhost:8080/v1/chat/completions -d '{
+    "model": "functiongemma",
+    "parallel_tool_calls": true,
+    "messages": [
+        {"role": "user", "content": "Turn on the living room lights and set the temperature to 70"}
+    ],
+    "tools": [...]
+}'
+```
+
+## Key Features
+
+- 🎯 **Specialized for Function Calling**: Purpose-built for tool use and API calling
+- ⚡ **Lightweight**: Only 270M parameters, runs efficiently on edge devices
+- 🔄 **Parallel Execution**: Call multiple tools simultaneously
+
+## Inputs and outputs
+
+**Input:**
+- Text string with system and user messages
+- Tool/function definitions in OpenAI format
+- Support for parallel tool calling with flag
+
+**Output:**
+- Structured function calls with appropriate parameters
+- Compatible with OpenAI chat completions format
+- JSON-formatted tool invocations
+
diff --git a/src/layouts/Layout.astro b/src/layouts/Layout.astro
@@ -361,20 +361,25 @@ const { title, description = "Experience the latest generative AI models optimiz
         function buildShell(meta, st) {
           var engineLower = (st.engine || '').toLowerCase();
           
-          // Check for full custom command from supportedEngines (for vLLM)
-          if (engineLower === 'vllm' && meta.supportedEngines && meta.supportedEngines.length > 0) {
-            var vllmEngine = meta.supportedEngines.find(function(e) {
-              return e.engine && e.engine.toLowerCase() === 'vllm';
+          // Check for full custom command from supportedEngines (for vLLM and llama.cpp)
+          if (meta.supportedEngines && meta.supportedEngines.length > 0) {
+            var customEngine = meta.supportedEngines.find(function(e) {
+              if (!e.engine) return false;
+              var engineName = e.engine.toLowerCase();
+              // Match by exact name or by removing dots/special chars (llama.cpp -> llamacpp)
+              var normalizedName = engineName.replace(/[.\-_]/g, '');
+              var normalizedEngineLower = engineLower.replace(/[.\-_]/g, '');
+              return engineName === engineLower || normalizedName === normalizedEngineLower;
             });
-            if (vllmEngine) {
+            if (customEngine) {
               var customCmd = null;
               var device = st.device || 'Jetson Orin';
-              if (device === 'Jetson Thor' && vllmEngine.run_command_thor) {
-                customCmd = vllmEngine.run_command_thor;
-              } else if (device === 'Jetson Orin' && vllmEngine.run_command_orin) {
-                customCmd = vllmEngine.run_command_orin;
-              } else if (vllmEngine.run_command) {
-                customCmd = vllmEngine.run_command;
+              if (device === 'Jetson Thor' && customEngine.run_command_thor) {
+                customCmd = customEngine.run_command_thor;
+              } else if (device === 'Jetson Orin' && customEngine.run_command_orin) {
+                customCmd = customEngine.run_command_orin;
+              } else if (customEngine.run_command) {
+                customCmd = customEngine.run_command;
               }
               if (customCmd) {
                 return customCmd;
diff --git a/src/pages/models/index.astro b/src/pages/models/index.astro
@@ -66,7 +66,7 @@ const families = Array.from(new Set(models.map(m => m.family)));
                       {model.description}
                     </p>
 
-                    <div class="mt-auto pt-4 border-t border-gray-100 relative z-20">
+                    <div class="mt-auto pt-4 border-t border-gray-100">
                       <div class="flex items-center gap-2">
                         <RunCommand modelId={model.id} modelName={model.name} category={model.category} forceModal={model.category === 'Image'} supportedEngines={model.supported_inference_engines} hfCheckpoint={model.hf_checkpoint} />
                         <a href={`/models/${model.id}`} class="px-3 py-1.5 rounded-md bg-nvidia-gray-100 text-nvidia-gray-900 hover:bg-nvidia-green hover:text-nvidia-black text-xs font-semibold transition-colors">