Skip to content

[BUG] RuntimeError: Numpy is not available #1403

@davidray222

Description

@davidray222

Describe the bug

INFO  Packing model...
INFO  Packing Kernel: Auto-selection: adding candidate `TorchQuantLinear`
INFO  Kernel: candidates -> `[TorchQuantLinear]`
INFO  Kernel: selected -> `TorchQuantLinear`.
Packing model.layers.0.mlp.gate_proj    [5 of 224] █---------------------------------------------------------------| 0:00:00 / 0:00:00 [5/224] 2.2%Traceback (most recent call last):
  File "/mnt/8tb_raid/david_model/GPTQModel/examples/quantization/quant_deepseek_autoround.py", line 79, in <module>
    main()
  File "/mnt/8tb_raid/david_model/GPTQModel/examples/quantization/quant_deepseek_autoround.py", line 43, in main
    model.quantize(examples)
  File "/home/david/miniconda3/envs/gptqmodel/lib/python3.10/site-packages/gptqmodel/models/base.py", line 421, in quantize
    return module_looper.loop(
  File "/home/david/miniconda3/envs/gptqmodel/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
  File "/home/david/miniconda3/envs/gptqmodel/lib/python3.10/site-packages/gptqmodel/looper/module_looper.py", line 441, in loop
    reverse_p.finalize(model=self.gptq_model, **kwargs)
  File "/home/david/miniconda3/envs/gptqmodel/lib/python3.10/site-packages/gptqmodel/looper/gptq_processor.py", line 200, in finalize
    model.qlinear_kernel = pack_model(
  File "/home/david/miniconda3/envs/gptqmodel/lib/python3.10/site-packages/gptqmodel/utils/model.py", line 592, in pack_model
    for _ in executor.map(wrapper, names):
  File "/home/david/miniconda3/envs/gptqmodel/lib/python3.10/concurrent/futures/_base.py", line 621, in result_iterator
    yield _result_or_cancel(fs.pop())
  File "/home/david/miniconda3/envs/gptqmodel/lib/python3.10/concurrent/futures/_base.py", line 319, in _result_or_cancel
    return fut.result(timeout)
  File "/home/david/miniconda3/envs/gptqmodel/lib/python3.10/concurrent/futures/_base.py", line 458, in result
    return self.__get_result()
  File "/home/david/miniconda3/envs/gptqmodel/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
    raise self._exception
  File "/home/david/miniconda3/envs/gptqmodel/lib/python3.10/concurrent/futures/thread.py", line 58, in run
    result = self.fn(*self.args, **self.kwargs)
  File "/home/david/miniconda3/envs/gptqmodel/lib/python3.10/site-packages/gptqmodel/utils/model.py", line 590, in wrapper
    pack_module(name, qModules, quant_result, modules)
  File "/home/david/miniconda3/envs/gptqmodel/lib/python3.10/site-packages/gptqmodel/utils/model.py", line 529, in pack_module
    qModules[name].pack(linear=layers[name], scales=scale, zeros=zero, g_idx=g_idx)
  File "/home/david/miniconda3/envs/gptqmodel/lib/python3.10/site-packages/gptqmodel/nn_modules/qlinear/__init__.py", line 469, in pack
    int_weight = int_weight.numpy().astype(self.pack_np_math_dtype)
RuntimeError: Numpy is not available

I used the code from "/GPTQModel/examples/quantization/basic_usage_autoround.py" to quantize deepseek-ai/DeepSeek-R1-Distill-Llama-8B and Qwen/QwQ-32B, but I encountered the same issue in both cases.

GPU Info

Show output of:NVIDIA A6000

nvidia-smi

Software Info

CUDA Version: 12.8

Show output of:

Name: gptqmodel
Version: 2.0.1.dev0
---
Name: torch
Version: 2.2.0
---
Name: transformers
Version: 4.49.0
---
Name: accelerate
Version: 1.3.0
---
Name: triton
Version: 2.2.0
Name: numpy
Version:2.2.3
(gptqmodel) david@asus-ESC4000-E11:/mnt/8tb_raid/david_model/GPTQModel$ pip list
Package                  Version
------------------------ -----------
accelerate               1.3.0
aiohappyeyeballs         2.5.0
aiohttp                  3.11.13
aiosignal                1.3.2
async-timeout            5.0.1
attrs                    25.1.0
certifi                  2025.1.31
charset-normalizer       3.4.1
datasets                 3.3.2
device-smi               0.4.1
dill                     0.3.8
filelock                 3.17.0
frozenlist               1.5.0
fsspec                   2024.12.0
gptqmodel                2.0.1.dev0
hf_transfer              0.1.9
huggingface-hub          0.29.2
idna                     3.10
Jinja2                   3.1.6
logbar                   0.0.3
MarkupSafe               3.0.2
mpmath                   1.3.0
multidict                6.1.0
multiprocess             0.70.16
networkx                 3.4.2
numpy                    2.2.3
nvidia-cublas-cu12       12.1.3.1
nvidia-cuda-cupti-cu12   12.1.105
nvidia-cuda-nvrtc-cu12   12.1.105
nvidia-cuda-runtime-cu12 12.1.105
nvidia-cudnn-cu12        8.9.2.26
nvidia-cufft-cu12        11.0.2.54
nvidia-curand-cu12       10.3.2.106
nvidia-cusolver-cu12     11.4.5.107
nvidia-cusparse-cu12     12.1.0.106
nvidia-cusparselt-cu12   0.6.2
nvidia-nccl-cu12         2.19.3
nvidia-nvjitlink-cu12    12.8.93
nvidia-nvtx-cu12         12.1.105
packaging                24.2
pandas                   2.2.3
pillow                   11.1.0
pip                      25.0
propcache                0.3.0
protobuf                 6.30.0
psutil                   7.0.0
pyarrow                  19.0.1
python-dateutil          2.9.0.post0
pytz                     2025.1
PyYAML                   6.0.2
regex                    2024.11.6
requests                 2.32.3
safetensors              0.5.3
setuptools               75.8.0
six                      1.17.0
sympy                    1.13.1
threadpoolctl            3.5.0
tokenicer                0.0.4
tokenizers               0.21.0
torch                    2.2.0
tqdm                     4.67.1
transformers             4.49.0
triton                   2.2.0
typing_extensions        4.12.2
tzdata                   2025.1
urllib3                  2.3.0
wheel                    0.45.1
xxhash                   3.5.0
yarl                     1.18.3

my code:

# Copyright 2024-2025 ModelCloud.ai
# Copyright 2024-2025 [email protected]
# Contact: [email protected], x.com/qubitium
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
from gptqmodel import GPTQModel
from gptqmodel.quantization.config import AutoRoundQuantizeConfig  # noqa: E402
from transformers import AutoTokenizer

pretrained_model_id = "Qwen/QwQ-32B" # "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
quantized_model_id = "./autoround/Qwen-QwQ-32B-4bit-32g"

def main():
    tokenizer = AutoTokenizer.from_pretrained(pretrained_model_id, use_fast=True)
    examples = [
        tokenizer(
            "gptqmodel is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."
        )
    ]

    quantize_config = AutoRoundQuantizeConfig(
        bits=4,
        group_size=32
    )

    model = GPTQModel.load(
        pretrained_model_id,
        quantize_config=quantize_config,
    )

    model.quantize(examples)

    model.save(quantized_model_id)

    tokenizer.save_pretrained(quantized_model_id)

    del model

    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    model = GPTQModel.from_quantized(
        quantized_model_id,
        device=device,
    )

    input_ids = torch.ones((1, 1), dtype=torch.long, device=device)
    outputs = model(input_ids=input_ids)
    print(f"output logits {outputs.logits.shape}: \n", outputs.logits)
    # inference with model.generate
    print(
        tokenizer.decode(
            model.generate(
                **tokenizer("gptqmodel is", return_tensors="pt").to(model.device)
            )[0]
        )
    )


if __name__ == "__main__":
    import logging

    logging.basicConfig(
        format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
        level=logging.INFO,
        datefmt="%Y-%m-%d %H:%M:%S",
    )

    main()

Thank you!!!!!!!!!!!!!!!!

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions