etsien's picture
Upload GPTQ W4A8 V2 quantized Llama-3.1-Nemotron-70B
3b948d8 verified
default_stage:
default_modifiers:
GPTQModifier:
targets: [Linear]
ignore: [lm_head]
scheme: W4A8
sequential_update: true
block_size: 64
dampening_frac: 0.01
offload_hessians: false