ZeroGPU-LLM-Inference

Runtime error

Luigi commited on Oct 9

Commit

b9efb74

verified ·

1 Parent(s): 5e03586

add qwen3 32b awq

Files changed (1) hide show

app.py CHANGED Viewed

@@ -39,6 +39,12 @@ MODELS = {
     # ~235B total parameters (MoE: 22B activated) — included for reference if added later
     # "Qwen3-235B-A22B-Thinking": { ... },
     # 14.8B total parameters
     "Qwen3-14B": {
         "repo_id": "Qwen/Qwen3-14B",

     # ~235B total parameters (MoE: 22B activated) — included for reference if added later
     # "Qwen3-235B-A22B-Thinking": { ... },
+    # 32.8B total parameters
+    "Qwen3-32B-AWQ": {
+        "repo_id": "Qwen/Qwen3-32B-AWQ",
+        "description": "4-bit AWQ quantized dense causal language model with 32.8B total parameters (31.2B non-embedding), 64 layers, 64 query heads & 8 KV heads, native 32,768-token context (extendable to 131,072 via YaRN). Features seamless switching between thinking mode (for complex reasoning, math, coding) and non-thinking mode (for efficient dialogue), strong multilingual support (100+ languages), and leading open-source agent capabilities."
+    },
     # 14.8B total parameters
     "Qwen3-14B": {
         "repo_id": "Qwen/Qwen3-14B",