diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/.gitignore b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/.gitignore similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/.gitignore rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/.gitignore diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/README.md b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/README.md similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/README.md rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/README.md diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/.gitignore b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/.gitignore similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/.gitignore rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/.gitignore diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/Chart.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/Chart.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/Chart.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/Chart.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/gpt2_values.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/gpt2_values.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/gpt2_values.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/gpt2_values.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-70b_values.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-70b_values.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-70b_values.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-70b_values.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b-chat_values.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b-chat_values.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b-chat_values.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b-chat_values.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b_values.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b_values.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b_values.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b_values.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-70b-instruct_values.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-70b-instruct_values.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-70b-instruct_values.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-70b-instruct_values.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b-instruct_values.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b-instruct_values.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b-instruct_values.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b-instruct_values.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b_values.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b_values.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b_values.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b_values.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/opt125m_values.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/opt125m_values.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/opt125m_values.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/opt125m_values.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/NOTES.txt b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/NOTES.txt similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/NOTES.txt rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/NOTES.txt diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/deployment.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/deployment.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/deployment.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/deployment.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/job.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/job.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/job.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/job.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/pod-monitor.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/pod-monitor.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/pod-monitor.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/pod-monitor.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/rbac.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/rbac.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/rbac.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/rbac.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/service.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/service.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/service.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/service.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/values.schema.json b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/values.schema.json similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/values.schema.json rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/values.schema.json diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/values.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/values.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/values.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/chart/values.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/containers/README.md b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/containers/README.md similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/containers/README.md rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/containers/README.md diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/containers/kubessh b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/containers/kubessh similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/containers/kubessh rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/containers/kubessh diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/containers/server.py b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/containers/server.py similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/containers/server.py rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/containers/server.py diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/containers/triton_trt-llm.containerfile b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/containers/triton_trt-llm.containerfile similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/containers/triton_trt-llm.containerfile rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/containers/triton_trt-llm.containerfile diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/nvidia_dcgm-exporter_values.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/nvidia_dcgm-exporter_values.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/nvidia_dcgm-exporter_values.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/nvidia_dcgm-exporter_values.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/nvidia_gpu-feature-discovery_daemonset.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/nvidia_gpu-feature-discovery_daemonset.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/nvidia_gpu-feature-discovery_daemonset.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/nvidia_gpu-feature-discovery_daemonset.yaml diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/pvc.yaml b/Archives/TensorRT-LLM_Multi-Node_Distributed_Models/pvc.yaml similarity index 100% rename from Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/pvc.yaml rename to Archives/TensorRT-LLM_Multi-Node_Distributed_Models/pvc.yaml diff --git a/Deployment/Kubernetes/README.md b/Deployment/Kubernetes/README.md index aecee447..fabeea9f 100644 --- a/Deployment/Kubernetes/README.md +++ b/Deployment/Kubernetes/README.md @@ -1,4 +1,4 @@ # Kubernetes Deployment of Triton Server Guides * [TensorRT-LLM Gen. AI Autoscaling & Load Balancing](./TensorRT-LLM_Autoscaling_and_Load_Balancing/README.md) -* [Multi-Node Generative AI w/ Triton Server and TensorRT-LLM](./TensorRT-LLM_Multi-Node_Distributed_Models/README.md) +* [Multi-Node Generative AI w/ Triton Server and TensorRT-LLM](./EKS_Multinode_Triton_TRTLLM/README.md)