You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[rank0]: Traceback (most recent call last):
[rank0]: File "/root/mcts_methods/entropy-evals/eval_vanila.py", line 211, in <module>
[rank0]: fire.Fire(AnswerPredictor)
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/fire/core.py", line 135, in Fire
[rank0]: component_trace = _Fire(component, args, parsed_flag_args, context, name)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/fire/core.py", line 468, in _Fire
[rank0]: component, remaining_args = _CallAndUpdateTrace(
[rank0]: ^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/fire/core.py", line 684, in _CallAndUpdateTrace
[rank0]: component = fn(*varargs, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/mcts_methods/entropy-evals/eval_vanila.py", line 51, in __init__
[rank0]: model = PiecewiseSamplingModel(
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/mcts_methods/entropy-evals/vanila_greedy_decoding.py", line 31, in __init__
[rank0]: self.model = LLM(
[rank0]: ^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/utils.py", line 1028, in inner
[rank0]: return fn(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/entrypoints/llm.py", line 210, in __init__
[rank0]: self.llm_engine = self.engine_class.from_engine_args(
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/engine/llm_engine.py", line 585, in from_engine_args
[rank0]: engine = cls(
[rank0]: ^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/engine/llm_engine.py", line 347, in __init__
[rank0]: self.model_executor = executor_class(vllm_config=vllm_config, )
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/executor/distributed_gpu_executor.py", line 26, in __init__
[rank0]: super().__init__(*args, **kwargs)
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/executor/executor_base.py", line 36, in __init__
[rank0]: self._init_executor()
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/executor/multiproc_gpu_executor.py", line 114, in _init_executor
[rank0]: self._run_workers("load_model",
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/executor/multiproc_gpu_executor.py", line 195, in _run_workers
[rank0]: driver_worker_output = driver_worker_method(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/worker/worker.py", line 152, in load_model
[rank0]: self.model_runner.load_model()
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/worker/model_runner.py", line 1074, in load_model
[rank0]: self.model = get_model(vllm_config=self.vllm_config)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/model_executor/model_loader/__init__.py", line 12, in get_model
[rank0]: return loader.load_model(vllm_config=vllm_config)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/model_executor/model_loader/loader.py", line 332, in load_model
[rank0]: model = _initialize_model(vllm_config=vllm_config)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/model_executor/model_loader/loader.py", line 100, in _initialize_model
[rank0]: return model_class(vllm_config=vllm_config, prefix=prefix)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/model_executor/models/llama.py", line 503, in __init__
[rank0]: self.model = LlamaModel(vllm_config=vllm_config,
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/compilation/decorators.py", line 126, in __init__
[rank0]: old_init(self, vllm_config=vllm_config, prefix=prefix, **kwargs)
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/model_executor/models/llama.py", line 298, in __init__
[rank0]: self.start_layer, self.end_layer, self.layers = make_layers(
[rank0]: ^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/model_executor/models/utils.py", line 509, in make_layers
[rank0]: [PPMissingLayer() for _ in range(start_layer)] + [
[rank0]: ^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/model_executor/models/utils.py", line 510, in <listcomp>
[rank0]: maybe_offload_to_cpu(layer_fn(prefix=f"{prefix}.{idx}"))
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/model_executor/models/llama.py", line 300, in <lambda>
[rank0]: lambda prefix: LlamaDecoderLayer(config=config,
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/model_executor/models/llama.py", line 217, in __init__
[rank0]: self.self_attn = LlamaAttention(
[rank0]: ^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/envs/blueberry/lib/python3.11/site-packages/vllm/model_executor/models/llama.py", line 125, in __init__
[rank0]: assert self.total_num_kv_heads % tp_size == 0
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: AssertionError
Before submitting a new issue...
Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
The text was updated successfully, but these errors were encountered:
🚀 The feature, motivation and pitch
The upcoming Phi-4 is already available: https://huggingface.co/matteogeniaccio/phi-4/tree/main/phi-4
The benchmarks are great.
Alternatives
No response
Additional context
Currently, it fails with the latest vllm:
Before submitting a new issue...
The text was updated successfully, but these errors were encountered: