Skip to content
14 changes: 8 additions & 6 deletions .github/workflows/publish_sub_package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,19 @@ on:
push:
branches:
- main
pull_request:
branches: [main, ipex-llm-llm-gpu]
paths:
- ".github/workflows/publish_sub_package.yml"
- "llama-index-integrations/**"

env:
POETRY_VERSION: "1.6.1"
PYTHON_VERSION: "3.10"

jobs:
publish_subpackage_if_needed:
if: github.repository == 'run-llama/llama_index'
# if: github.repository == 'run-llama/llama_index'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
Expand All @@ -30,14 +35,11 @@ jobs:
run: |
echo "changed_files=$(git diff --name-only ${{ github.event.before }} ${{ github.event.after }} | grep -v llama-index-core | grep llama-index | grep pyproject | xargs)" >> $GITHUB_OUTPUT
- name: Publish changed packages
env:
PYPI_TOKEN: ${{ secrets.LLAMA_INDEX_PYPI_TOKEN }}
run: |
for file in ${{ steps.changed-files.outputs.changed_files }}; do
for file in llama-index-integrations/llms/llama-index-llms-ipex-llm/pyproject.toml; do
cd `echo $file | sed 's/\/pyproject.toml//g'`
poetry lock
pip install -e .
poetry config pypi-token.pypi $PYPI_TOKEN
poetry publish --build
poetry publish --build --dry-run
cd -
done
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,20 @@ def completion_to_prompt(completion):
choices=["sym_int4", "asym_int4", "sym_int5", "asym_int5", "sym_int8"],

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update the choices to and add GPU related data_types. For a full list of datatypes we can support, refer to https://ipex-llm.readthedocs.io/en/latest/doc/PythonAPI/LLM/transformers.html#automodelforcausallm load_in_lowbit param api doc

help="The quantization type the model will convert to.",
)
parser.add_argument(
"--device",
"-d",
type=str,
default="xpu",
choices=["cpu", "xpu", "auto"],
help="The device the model will run on.",
)

args = parser.parse_args()
model_name = args.model_name
tokenizer_name = args.tokenizer_name
low_bit = args.low_bit
device = args.device

# load the model using low-bit format specified
llm = IpexLLM.from_model_id(
Expand All @@ -48,6 +57,7 @@ def completion_to_prompt(completion):
load_in_low_bit=low_bit,
completion_to_prompt=completion_to_prompt,
generate_kwargs={"do_sample": False},
device_map=device,
)

print(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,15 @@ version = "0.1.2"
[tool.poetry.dependencies]
python = ">=3.9,<4.0"
llama-index-core = "^0.10.0"
torch = "<2.2.0"
ipex-llm = {allow-prereleases = true, extras = ["all"], version = "*"}
ipex-llm = {allow-prereleases = true, extras = ["llama-index"], version = ">=2.1.0b20240514"}
torch = {optional = true, source = "ipex-xpu-src-us", version = "2.1.0a0"}
torchvision = {optional = true, source = "ipex-xpu-src-us", version = "0.16.0a0"}
intel_extension_for_pytorch = {optional = true, source = "ipex-xpu-src-us", version = "2.1.10+xpu"}
bigdl-core-xe-21 = {optional = true, version = "*"}
bigdl-core-xe-esimd-21 = {optional = true, version = "*"}

[tool.poetry.extras]
xpu = ["bigdl-core-xe-21", "bigdl-core-xe-esimd-21", "intel_extension_for_pytorch", "torch", "torchvision"]

[tool.poetry.group.dev.dependencies]
black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"}
Expand All @@ -56,3 +63,13 @@ types-protobuf = "^4.24.0.4"
types-redis = "4.5.5.0"
types-requests = "2.28.11.8" # TODO: unpin when mypy>0.991
types-setuptools = "67.1.0.0"

[[tool.poetry.source]]
name = "ipex-xpu-src-us"
priority = "explicit"
url = "https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"

[[tool.poetry.source]]
name = "ipex-xpu-src-cn"
priority = "supplemental"
url = "https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/"