vllm.config.kv_transfer ¶

KVConsumer `module-attribute` ¶

KVConsumer = Literal['kv_consumer', 'kv_both']

KVProducer `module-attribute` ¶

KVProducer = Literal['kv_producer', 'kv_both']

KVRole `module-attribute` ¶

KVRole = Literal[KVProducer, KVConsumer]

KVTransferConfig ¶

Configuration for distributed KV cache transfer.

Source code in vllm/config/kv_transfer.py

@config
@dataclass
class KVTransferConfig:
    """Configuration for distributed KV cache transfer."""

    kv_connector: str | None = None
    """The KV connector for vLLM to transmit KV caches between vLLM instances.
    """

    engine_id: str = Field(default=None, validate_default=True)
    """The engine id for KV transfers."""

    kv_buffer_device: Literal["cuda", "cpu"] = "cuda"
    """The device used by kv connector to buffer the KV cache."""

    kv_buffer_size: float = Field(default=1e9, gt=0)
    """The buffer size for TorchDistributedConnector. Measured in number of
    bytes. Recommended value: 1e9 (about 1GB)."""

    kv_role: KVRole | None = None
    """Whether this vLLM instance produces, consumes KV cache, or both. Choices
    are 'kv_producer', 'kv_consumer', and 'kv_both'."""

    kv_rank: int | None = None
    """The rank of this vLLM instance in the KV cache transfer. Typical value:
    0 for prefill instance, 1 for decode instance.
    Currently only 1P1D is supported."""

    kv_parallel_size: int = Field(default=1, ge=1)
    """The number of parallel instances for KV cache transfer. For
    P2pNcclConnector, this should be 2."""

    kv_ip: str = "127.0.0.1"
    """The KV connector ip, used to build distributed connection."""

    kv_port: int = 14579
    """The KV connector port, used to build distributed connection."""

    kv_connector_extra_config: dict[str, Any] = Field(default_factory=dict)
    """any extra config that the connector may need."""

    kv_connector_module_path: str | None = None
    """The Python module path to dynamically load the KV connector from.
    Only supported in V1."""

    def compute_hash(self) -> str:
        """
        WARNING: Whenever a new field is added to this config,
        ensure that it is included in the factors list if
        it affects the computation graph.

        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        """
        # no factors to consider.
        # this config will not affect the computation graph.
        factors: list[Any] = []
        hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest()
        return hash_str

    @field_validator("engine_id", mode="before")
    @classmethod
    def _validate_engine_id(cls, engine_id: Any | None) -> Any:
        """Must be set here instead of `default_factory` to ensure
        that each instance of `KVTransferConfig` gets a unique `engine_id`."""
        if engine_id is None:
            return str(uuid.uuid4())
        return engine_id

    @model_validator(mode="after")
    def _validate_kv_transfer_config(self) -> Self:
        if self.kv_connector is not None and self.kv_role is None:
            raise ValueError(
                "Please specify kv_role when kv_connector "
                f"is set, supported roles are {get_args(KVRole)}"
            )
        return self

    @property
    def is_kv_transfer_instance(self) -> bool:
        return self.kv_connector is not None and self.kv_role in get_args(KVRole)

    @property
    def is_kv_producer(self) -> bool:
        return self.kv_connector is not None and self.kv_role in get_args(KVProducer)

    @property
    def is_kv_consumer(self) -> bool:
        return self.kv_connector is not None and self.kv_role in get_args(KVConsumer)

    def get_from_extra_config(self, key, default) -> Any:
        return self.kv_connector_extra_config.get(key, default)

engine_id `class-attribute` `instance-attribute` ¶

engine_id: str = Field(default=None, validate_default=True)

The engine id for KV transfers.

is_kv_consumer `property` ¶

is_kv_consumer: bool

is_kv_producer `property` ¶

is_kv_producer: bool

is_kv_transfer_instance `property` ¶

is_kv_transfer_instance: bool

kv_buffer_device `class-attribute` `instance-attribute` ¶

kv_buffer_device: Literal['cuda', 'cpu'] = 'cuda'

The device used by kv connector to buffer the KV cache.

kv_buffer_size `class-attribute` `instance-attribute` ¶

kv_buffer_size: float = Field(default=1000000000.0, gt=0)

The buffer size for TorchDistributedConnector. Measured in number of bytes. Recommended value: 1e9 (about 1GB).

kv_connector `class-attribute` `instance-attribute` ¶

kv_connector: str | None = None

The KV connector for vLLM to transmit KV caches between vLLM instances.

kv_connector_extra_config `class-attribute` `instance-attribute` ¶

kv_connector_extra_config: dict[str, Any] = Field(
    default_factory=dict
)

any extra config that the connector may need.

kv_connector_module_path `class-attribute` `instance-attribute` ¶

kv_connector_module_path: str | None = None

The Python module path to dynamically load the KV connector from. Only supported in V1.

kv_ip `class-attribute` `instance-attribute` ¶

kv_ip: str = '127.0.0.1'

The KV connector ip, used to build distributed connection.

kv_parallel_size `class-attribute` `instance-attribute` ¶

kv_parallel_size: int = Field(default=1, ge=1)

The number of parallel instances for KV cache transfer. For P2pNcclConnector, this should be 2.

kv_port `class-attribute` `instance-attribute` ¶

kv_port: int = 14579

The KV connector port, used to build distributed connection.

kv_rank `class-attribute` `instance-attribute` ¶

kv_rank: int | None = None

The rank of this vLLM instance in the KV cache transfer. Typical value: 0 for prefill instance, 1 for decode instance. Currently only 1P1D is supported.

kv_role `class-attribute` `instance-attribute` ¶

kv_role: KVRole | None = None

Whether this vLLM instance produces, consumes KV cache, or both. Choices are 'kv_producer', 'kv_consumer', and 'kv_both'.

_validate_engine_id `classmethod` ¶

_validate_engine_id(engine_id: Any | None) -> Any

Must be set here instead of default_factory to ensure that each instance of KVTransferConfig gets a unique engine_id.

Source code in vllm/config/kv_transfer.py

@field_validator("engine_id", mode="before")
@classmethod
def _validate_engine_id(cls, engine_id: Any | None) -> Any:
    """Must be set here instead of `default_factory` to ensure
    that each instance of `KVTransferConfig` gets a unique `engine_id`."""
    if engine_id is None:
        return str(uuid.uuid4())
    return engine_id

_validate_kv_transfer_config ¶

_validate_kv_transfer_config() -> Self

Source code in vllm/config/kv_transfer.py

@model_validator(mode="after")
def _validate_kv_transfer_config(self) -> Self:
    if self.kv_connector is not None and self.kv_role is None:
        raise ValueError(
            "Please specify kv_role when kv_connector "
            f"is set, supported roles are {get_args(KVRole)}"
        )
    return self

compute_hash ¶

compute_hash() -> str

WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if it affects the computation graph.

Provide a hash that uniquely identifies all the configs that affect the structure of the computation graph from input ids/embeddings to the final hidden states, excluding anything before input ids/embeddings and after the final hidden states.

Source code in vllm/config/kv_transfer.py

def compute_hash(self) -> str:
    """
    WARNING: Whenever a new field is added to this config,
    ensure that it is included in the factors list if
    it affects the computation graph.

    Provide a hash that uniquely identifies all the configs
    that affect the structure of the computation
    graph from input ids/embeddings to the final hidden states,
    excluding anything before input ids/embeddings and after
    the final hidden states.
    """
    # no factors to consider.
    # this config will not affect the computation graph.
    factors: list[Any] = []
    hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest()
    return hash_str

get_from_extra_config ¶

get_from_extra_config(key, default) -> Any

Source code in vllm/config/kv_transfer.py

def get_from_extra_config(self, key, default) -> Any:
    return self.kv_connector_extra_config.get(key, default)

vllm.config.kv_transfer ¶

KVConsumer module-attribute ¶

KVProducer module-attribute ¶

KVRole module-attribute ¶

KVTransferConfig ¶

engine_id class-attribute instance-attribute ¶

is_kv_consumer property ¶

is_kv_producer property ¶

is_kv_transfer_instance property ¶

kv_buffer_device class-attribute instance-attribute ¶

kv_buffer_size class-attribute instance-attribute ¶

kv_connector class-attribute instance-attribute ¶

kv_connector_extra_config class-attribute instance-attribute ¶

kv_connector_module_path class-attribute instance-attribute ¶

kv_ip class-attribute instance-attribute ¶

kv_parallel_size class-attribute instance-attribute ¶

kv_port class-attribute instance-attribute ¶

kv_rank class-attribute instance-attribute ¶

kv_role class-attribute instance-attribute ¶

_validate_engine_id classmethod ¶

_validate_kv_transfer_config ¶

compute_hash ¶

get_from_extra_config ¶

KVConsumer `module-attribute` ¶

KVProducer `module-attribute` ¶

KVRole `module-attribute` ¶

engine_id `class-attribute` `instance-attribute` ¶

is_kv_consumer `property` ¶

is_kv_producer `property` ¶

is_kv_transfer_instance `property` ¶

kv_buffer_device `class-attribute` `instance-attribute` ¶

kv_buffer_size `class-attribute` `instance-attribute` ¶

kv_connector `class-attribute` `instance-attribute` ¶

kv_connector_extra_config `class-attribute` `instance-attribute` ¶

kv_connector_module_path `class-attribute` `instance-attribute` ¶

kv_ip `class-attribute` `instance-attribute` ¶

kv_parallel_size `class-attribute` `instance-attribute` ¶

kv_port `class-attribute` `instance-attribute` ¶

kv_rank `class-attribute` `instance-attribute` ¶

kv_role `class-attribute` `instance-attribute` ¶

_validate_engine_id `classmethod` ¶