Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion invokeai/app/invocations/anima_text_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@
from invokeai.app.invocations.model import Qwen3EncoderField
from invokeai.app.invocations.primitives import AnimaConditioningOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.anima.t5_tokenizer import load_bundled_t5_tokenizer
from invokeai.backend.patches.layer_patcher import LayerPatcher
from invokeai.backend.patches.lora_conversions.anima_lora_constants import ANIMA_LORA_QWEN3_PREFIX
from invokeai.backend.patches.model_patch_raw import ModelPatchRaw
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
AnimaConditioningInfo,
ConditioningFieldData,
)
from invokeai.backend.t5.t5_tokenizer import load_bundled_t5_tokenizer
from invokeai.backend.util.devices import TorchDevice
from invokeai.backend.util.logging import InvokeAILogger

Expand Down
25 changes: 0 additions & 25 deletions invokeai/backend/anima/t5_tokenizer.py

This file was deleted.

7 changes: 6 additions & 1 deletion invokeai/backend/model_manager/configs/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,11 @@
T2IAdapter_Diffusers_SD1_Config,
T2IAdapter_Diffusers_SDXL_Config,
)
from invokeai.backend.model_manager.configs.t5_encoder import T5Encoder_BnBLLMint8_Config, T5Encoder_T5Encoder_Config
from invokeai.backend.model_manager.configs.t5_encoder import (
T5Encoder_BnBLLMint8_Config,
T5Encoder_GGUF_Config,
T5Encoder_T5Encoder_Config,
)
from invokeai.backend.model_manager.configs.text_llm import TextLLM_Diffusers_Config
from invokeai.backend.model_manager.configs.textual_inversion import (
TI_File_SD1_Config,
Expand Down Expand Up @@ -246,6 +250,7 @@
# T5 Encoder - all formats
Annotated[T5Encoder_T5Encoder_Config, T5Encoder_T5Encoder_Config.get_tag()],
Annotated[T5Encoder_BnBLLMint8_Config, T5Encoder_BnBLLMint8_Config.get_tag()],
Annotated[T5Encoder_GGUF_Config, T5Encoder_GGUF_Config.get_tag()],
# Qwen3 Encoder
Annotated[Qwen3Encoder_Qwen3Encoder_Config, Qwen3Encoder_Qwen3Encoder_Config.get_tag()],
Annotated[Qwen3Encoder_Checkpoint_Config, Qwen3Encoder_Checkpoint_Config.get_tag()],
Expand Down
22 changes: 22 additions & 0 deletions invokeai/backend/model_manager/configs/qwen3_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,20 @@ def _has_ggml_tensors(state_dict: dict[str | int, Any]) -> bool:
return any(isinstance(v, GGMLTensor) for v in state_dict.values())


def _has_t5_encoder_keys(state_dict: dict[str | int, Any]) -> bool:
"""Check if state dict looks like a llama.cpp T5 encoder.

T5 encoder GGUFs (e.g. city96/t5-v1_1-xxl-encoder-gguf) also carry a ``token_embd.weight`` tensor,
which makes them satisfy the Qwen3 GGUF key heuristic. But their transformer blocks use the ``enc.``
prefix (``enc.blk.*``, ``enc.output_norm.weight``), which a Qwen3 encoder never has. We use this to
keep the T5 and Qwen3 encoder configs mutually exclusive.
"""
for key in state_dict.keys():
if isinstance(key, str) and (key.startswith("enc.blk.") or key == "enc.output_norm.weight"):
return True
return False


def _has_qwen_vl_visual_tower(state_dict: dict[str | int, Any]) -> bool:
"""Check if state dict bundles a Qwen2.5-VL / Qwen2-VL vision tower.

Expand Down Expand Up @@ -156,6 +170,10 @@ def _validate_looks_like_qwen3_model(cls, mod: ModelOnDisk) -> None:
state_dict = mod.load_state_dict()
if not _has_qwen3_keys(state_dict):
raise NotAMatchError("state dict does not look like a Qwen3 model")
# Reject T5 encoders: they share the token_embd.weight key with Qwen3 GGUFs but use the ``enc.``
# block prefix, and must be classified as T5Encoder (Qwen3 encoders never have ``enc.blk.*`` keys).
if _has_t5_encoder_keys(state_dict):
raise NotAMatchError("state dict looks like a T5 encoder (has 'enc.blk.*' keys), not a Qwen3 encoder")
# Reject Qwen2.5-VL / Qwen2-VL encoders: they carry a visual tower and must be
# classified as QwenVLEncoder (text-only Qwen3 encoders never have one).
if _has_qwen_vl_visual_tower(state_dict):
Expand Down Expand Up @@ -297,6 +315,10 @@ def _validate_looks_like_qwen3_model(cls, mod: ModelOnDisk) -> None:
state_dict = mod.load_state_dict()
if not _has_qwen3_keys(state_dict):
raise NotAMatchError("state dict does not look like a Qwen3 model")
# Reject T5 encoders: they share the token_embd.weight key with Qwen3 GGUFs but use the ``enc.``
# block prefix, and must be classified as T5Encoder (Qwen3 encoders never have ``enc.blk.*`` keys).
if _has_t5_encoder_keys(state_dict):
raise NotAMatchError("state dict looks like a T5 encoder (has 'enc.blk.*' keys), not a Qwen3 encoder")
# Reject Qwen2.5-VL / Qwen2-VL encoders: they carry a visual tower and must be
# classified as QwenVLEncoder (text-only Qwen3 encoders never have one).
if _has_qwen_vl_visual_tower(state_dict):
Expand Down
45 changes: 44 additions & 1 deletion invokeai/backend/model_manager/configs/t5_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,19 @@

from pydantic import Field

from invokeai.backend.model_manager.configs.base import Config_Base
from invokeai.backend.model_manager.configs.base import Checkpoint_Config_Base, Config_Base
from invokeai.backend.model_manager.configs.identification_utils import (
NotAMatchError,
raise_for_class_name,
raise_for_override_fields,
raise_if_not_dir,
raise_if_not_file,
state_dict_has_any_keys_ending_with,
state_dict_has_any_keys_starting_with,
)
from invokeai.backend.model_manager.model_on_disk import ModelOnDisk
from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat, ModelType
from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor


class T5Encoder_T5Encoder_Config(Config_Base):
Expand Down Expand Up @@ -80,3 +83,43 @@ def raise_if_state_dict_doesnt_look_like_bnb_quantized(cls, mod: ModelOnDisk) ->
has_scb_key_suffix = state_dict_has_any_keys_ending_with(mod.load_state_dict(), "SCB")
if not has_scb_key_suffix:
raise NotAMatchError("state dict does not look like bnb quantized llm_int8")


class T5Encoder_GGUF_Config(Checkpoint_Config_Base, Config_Base):
"""Configuration for GGUF-quantized T5 text encoder models in a single .gguf file.

These are conversions like city96/t5-v1_1-xxl-encoder-gguf, which use llama.cpp's T5 encoder
tensor naming (``enc.blk.N.*``, ``token_embd.weight``, ``enc.output_norm.weight``)."""

base: Literal[BaseModelType.Any] = Field(default=BaseModelType.Any)
type: Literal[ModelType.T5Encoder] = Field(default=ModelType.T5Encoder)
format: Literal[ModelFormat.GGUFQuantized] = Field(default=ModelFormat.GGUFQuantized)
cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only")

@classmethod
def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
raise_if_not_file(mod)

raise_for_override_fields(cls, override_fields)

cls.raise_if_doesnt_look_like_t5_encoder(mod)

cls.raise_if_doesnt_look_like_gguf_quantized(mod)

return cls(**override_fields)

@classmethod
def raise_if_doesnt_look_like_t5_encoder(cls, mod: ModelOnDisk) -> None:
# llama.cpp T5 encoders use the ``enc.`` prefix on their transformer blocks and final norm. This
# distinguishes them from decoder-only GGUF models (e.g. Qwen3, which uses bare ``blk.*``).
state_dict = mod.load_state_dict()
if not state_dict_has_any_keys_starting_with(
state_dict, "enc.blk."
) and not state_dict_has_any_keys_ending_with(state_dict, "enc.output_norm.weight"):
raise NotAMatchError("state dict does not look like a T5 encoder (no 'enc.blk.*' keys)")

@classmethod
def raise_if_doesnt_look_like_gguf_quantized(cls, mod: ModelOnDisk) -> None:
has_ggml = any(isinstance(v, GGMLTensor) for v in mod.load_state_dict().values())
if not has_ggml:
raise NotAMatchError("state dict does not look like GGUF quantized")
Loading
Loading