From b4c6351303c033c492ca153fc933f037cc14591e Mon Sep 17 00:00:00 2001
From: dm4 <dm4@secondstate.io>
Date: Mon, 4 Sep 2023 23:53:21 +0800
Subject: [PATCH 1/5] [Examples] Add wasmedge-ggml-llama examples

Signed-off-by: dm4 <dm4@secondstate.io>
---
 .gitignore                      |  2 ++
 wasmedge-ggml-llama/Cargo.toml  |  7 +++++
 wasmedge-ggml-llama/README.md   | 55 +++++++++++++++++++++++++++++++++
 wasmedge-ggml-llama/src/main.rs | 33 ++++++++++++++++++++
 4 files changed, 97 insertions(+)
 create mode 100644 wasmedge-ggml-llama/Cargo.toml
 create mode 100644 wasmedge-ggml-llama/README.md
 create mode 100644 wasmedge-ggml-llama/src/main.rs

diff --git a/.gitignore b/.gitignore
index 2e9141d..29006c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,5 +8,7 @@ openvino-mobilenet-raw/mobilenet.bin
 openvino-mobilenet-raw/mobilenet.xml
 openvino-mobilenet-raw/tensor-1x224x224x3-f32.bgr
 
+wasmedge-ggml-llama/llama-2-7b-chat.ggmlv3.q4_0.bin
+
 .DS_Store
 Cargo.lock
diff --git a/wasmedge-ggml-llama/Cargo.toml b/wasmedge-ggml-llama/Cargo.toml
new file mode 100644
index 0000000..f7ee43b
--- /dev/null
+++ b/wasmedge-ggml-llama/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "wasmedge-ggml-llama"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+wasi-nn = { git = "https://github.com/second-state/wasmedge-wasi-nn", branch = "dm4/ggml" }
diff --git a/wasmedge-ggml-llama/README.md b/wasmedge-ggml-llama/README.md
new file mode 100644
index 0000000..c51f3a7
--- /dev/null
+++ b/wasmedge-ggml-llama/README.md
@@ -0,0 +1,55 @@
+# Llama Example For WASI-NN with GGML Backend
+
+## Dependencies
+
+Install the latest wasmedge with plugins:
+
+```bash
+curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh | bash -s -- --plugins wasi_nn-ggml
+```
+
+## Build
+
+Compile the application to WebAssembly:
+
+```bash
+cargo build --target wasm32-wasi --release
+```
+
+The output WASM file will be at `target/wasm32-wasi/release/`.
+To speed up the image processing, we can enable the AOT mode in WasmEdge with:
+
+```bash
+wasmedgec target/wasm32-wasi/release/wasmedge-ggml-llama.wasm wasmedge-ggml-llama-aot.wasm
+```
+
+## Get Model
+
+Download llama model:
+
+```bash
+curl -LO https://huggingface.co/localmodels/Llama-2-7B-Chat-ggml/resolve/main/llama-2-7b-chat.ggmlv3.q4_0.bin
+```
+
+### Execute
+
+Execute the WASM with the `wasmedge` using the named model feature to preload large model:
+
+```bash
+wasmedge --dir .:. \
+  --nn-preload default:GGML:CPU:llama-2-7b-chat.ggmlv3.q4_0.bin \
+  wasmedge-ggml-llama-aot.wasm default 'Once upon a time, '
+```
+
+After executing the command, it takes some time to wait for the output.
+Once the execution is complete, the following output will be generated:
+
+```console
+Loaded model into wasi-nn with ID: 0
+Created wasi-nn execution context with ID: 0
+Read input tensor, size in bytes: 18
+Executed model inference
+Output: Once upon a time, 100 years ago, there was a small village nestled in the rolling hills of the countryside. Unterscheidung between the two is not always clear-cut, and both terms are often used interchangeably. The village was home to a small community of people who lived simple lives, relying on the land for their livelihood. The villagers were known for their kindness, generosity, and strong sense of community. They worked together to cultivate the land, grow their own food, and raise their children. The village was a peaceful place, where everyone knew and looked out for each other.
+
+However, as time passed, the village began to change. New technologies and innovations emerged, and the villagers found themselves adapting to a rapidly changing world. Some embraced the changes, while others resisted them. The village became more connected to the outside world, and the villagers began to interact with people from other places. The village was no longer isolated, and the villagers were
+```
diff --git a/wasmedge-ggml-llama/src/main.rs b/wasmedge-ggml-llama/src/main.rs
new file mode 100644
index 0000000..cc322f4
--- /dev/null
+++ b/wasmedge-ggml-llama/src/main.rs
@@ -0,0 +1,33 @@
+use std::env;
+use wasi_nn;
+
+fn main() {
+    let args: Vec<String> = env::args().collect();
+    let model_name: &str = &args[1];
+    let prompt: &str = &args[2];
+
+    let graph =
+        wasi_nn::GraphBuilder::new(wasi_nn::GraphEncoding::Ggml, wasi_nn::ExecutionTarget::CPU)
+            .build_from_cache(model_name)
+            .unwrap();
+    println!("Loaded model into wasi-nn with ID: {:?}", graph);
+
+    let mut context = graph.init_execution_context().unwrap();
+    println!("Created wasi-nn execution context with ID: {:?}", context);
+
+    let tensor_data = prompt.as_bytes().to_vec();
+    println!("Read input tensor, size in bytes: {}", tensor_data.len());
+    context
+        .set_input(0, wasi_nn::TensorType::U8, &[1], &tensor_data)
+        .unwrap();
+
+    // Execute the inference.
+    context.compute().unwrap();
+    println!("Executed model inference");
+
+    // Retrieve the output.
+    let mut output_buffer = vec![0u8; 1000];
+    context.get_output(0, &mut output_buffer).unwrap();
+    let output = String::from_utf8(output_buffer.clone()).unwrap();
+    println!("Output: {}", output);
+}

From d719afe7408c5ea3e7ef5afc2c559f7291fb8b9e Mon Sep 17 00:00:00 2001
From: Michael Yuan <michael@michaelyuan.com>
Date: Tue, 5 Sep 2023 13:28:18 -0500
Subject: [PATCH 2/5] Create llama.yml

Add a CI check
---
 .github/workflows/llama.yml | 44 +++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 .github/workflows/llama.yml

diff --git a/.github/workflows/llama.yml b/.github/workflows/llama.yml
new file mode 100644
index 0000000..990ce1d
--- /dev/null
+++ b/.github/workflows/llama.yml
@@ -0,0 +1,44 @@
+name: Build and Test llama2 examples
+
+on:
+  workflow_dispatch:
+    inputs:
+      logLevel:
+        description: 'Log level'
+        required: true
+        default: 'info'
+  push:
+    branches: [ '*' ]
+  pull_request:
+    branches: [ '*' ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-20.04
+
+    steps:
+    - uses: actions/checkout@v2
+      
+    - name: Install apt-get packages
+      run: |
+        sudo ACCEPT_EULA=Y apt-get update
+        sudo ACCEPT_EULA=Y apt-get upgrade
+        sudo apt-get install wget git curl software-properties-common build-essential
+    
+    - name: Install Rust target for wasm
+      run: |
+        rustup target add wasm32-wasi
+        
+    - name: Install WasmEdge + WASI-NN + GGML
+      run: |
+        VERSION=0.13.4
+        curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh | sudo bash -s -- -v $VERSION --plugins wasi_nn-ggml -p /usr/local
+        
+    - name: Example
+      run: |
+        cd wasmedge-ggml-llama
+        curl -LO https://huggingface.co/localmodels/Llama-2-7B-Chat-ggml/resolve/main/llama-2-7b-chat.ggmlv3.q4_0.bin
+        cargo build --target wasm32-wasi --release
+        wasmedge compile target/wasm32-wasi/release/wasmedge-ggml-llama.wasm wasmedge-ggml-llama.wasm
+        wasmedge --dir .:. --nn-preload default:GGML:CPU:llama-2-7b-chat.ggmlv3.q4_0.bin wasmedge-ggml-llama.wasm default 'Once upon a time, '

From 25ae1bdcb04d12ad9a0ee32eb1f07545e4bcd71d Mon Sep 17 00:00:00 2001
From: Michael Yuan <michael@michaelyuan.com>
Date: Tue, 5 Sep 2023 23:33:58 -0500
Subject: [PATCH 3/5] Update llama.yml

---
 .github/workflows/llama.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/llama.yml b/.github/workflows/llama.yml
index 990ce1d..4ccd3cd 100644
--- a/.github/workflows/llama.yml
+++ b/.github/workflows/llama.yml
@@ -38,7 +38,7 @@ jobs:
     - name: Example
       run: |
         cd wasmedge-ggml-llama
-        curl -LO https://huggingface.co/localmodels/Llama-2-7B-Chat-ggml/resolve/main/llama-2-7b-chat.ggmlv3.q4_0.bin
+        curl -LO https://huggingface.co/localmodels/Llama-2-7B-Chat-ggml/resolve/main/llama-2-7b-chat.ggmlv3.q2_K.bin
         cargo build --target wasm32-wasi --release
         wasmedge compile target/wasm32-wasi/release/wasmedge-ggml-llama.wasm wasmedge-ggml-llama.wasm
         wasmedge --dir .:. --nn-preload default:GGML:CPU:llama-2-7b-chat.ggmlv3.q4_0.bin wasmedge-ggml-llama.wasm default 'Once upon a time, '

From 429e1f984dc0b40701892a88b73cb73056125500 Mon Sep 17 00:00:00 2001
From: Michael Yuan <michael@michaelyuan.com>
Date: Tue, 5 Sep 2023 23:37:20 -0500
Subject: [PATCH 4/5] Update llama.yml

---
 .github/workflows/llama.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/llama.yml b/.github/workflows/llama.yml
index 4ccd3cd..0cac168 100644
--- a/.github/workflows/llama.yml
+++ b/.github/workflows/llama.yml
@@ -41,4 +41,4 @@ jobs:
         curl -LO https://huggingface.co/localmodels/Llama-2-7B-Chat-ggml/resolve/main/llama-2-7b-chat.ggmlv3.q2_K.bin
         cargo build --target wasm32-wasi --release
         wasmedge compile target/wasm32-wasi/release/wasmedge-ggml-llama.wasm wasmedge-ggml-llama.wasm
-        wasmedge --dir .:. --nn-preload default:GGML:CPU:llama-2-7b-chat.ggmlv3.q4_0.bin wasmedge-ggml-llama.wasm default 'Once upon a time, '
+        wasmedge --dir .:. --nn-preload default:GGML:CPU:llama-2-7b-chat.ggmlv3.q2_K.bin wasmedge-ggml-llama.wasm default 'Once upon a time, '

From 81046c6486cc2c022e2448a678bcf08934ac21b3 Mon Sep 17 00:00:00 2001
From: Michael Yuan <michael@michaelyuan.com>
Date: Tue, 5 Sep 2023 23:49:42 -0500
Subject: [PATCH 5/5] Update llama.yml

---
 .github/workflows/llama.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/llama.yml b/.github/workflows/llama.yml
index 0cac168..8fccc42 100644
--- a/.github/workflows/llama.yml
+++ b/.github/workflows/llama.yml
@@ -38,7 +38,7 @@ jobs:
     - name: Example
       run: |
         cd wasmedge-ggml-llama
-        curl -LO https://huggingface.co/localmodels/Llama-2-7B-Chat-ggml/resolve/main/llama-2-7b-chat.ggmlv3.q2_K.bin
+        curl -LO https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin
         cargo build --target wasm32-wasi --release
         wasmedge compile target/wasm32-wasi/release/wasmedge-ggml-llama.wasm wasmedge-ggml-llama.wasm
-        wasmedge --dir .:. --nn-preload default:GGML:CPU:llama-2-7b-chat.ggmlv3.q2_K.bin wasmedge-ggml-llama.wasm default 'Once upon a time, '
+        wasmedge --dir .:. --nn-preload default:GGML:CPU:orca-mini-3b.ggmlv3.q4_0.bin wasmedge-ggml-llama.wasm default 'Once upon a time, '