Spaces:
Running
on
A100
Running
on
A100
MekkCyber
commited on
Commit
·
29baea6
1
Parent(s):
1c806d1
test zero gpu
Browse files
app.py
CHANGED
|
@@ -61,14 +61,22 @@ model = AutoModel.from_pretrained("{model_name}")"""
|
|
| 61 |
return model_card
|
| 62 |
|
| 63 |
@spaces.GPU
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
def quantize_model(model_name, quantization_type, group_size=128, auth_token=None, username=None, device="cuda"):
|
| 65 |
print(f"Quantizing model: {quantization_type}")
|
| 66 |
if quantization_type == "int4_weight_only" :
|
| 67 |
quantization_config = TorchAoConfig(quantization_type, group_size=group_size)
|
| 68 |
else :
|
| 69 |
quantization_config = TorchAoConfig(quantization_type)
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
| 72 |
|
| 73 |
return model
|
| 74 |
|
|
@@ -110,11 +118,11 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
|
|
| 110 |
return exists_message
|
| 111 |
if quantization_type == "int4_weight_only" and device == "cpu" :
|
| 112 |
return "int4_weight_only not supported on cpu"
|
| 113 |
-
try :
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
except Exception as e :
|
| 117 |
-
|
| 118 |
|
| 119 |
|
| 120 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
|
|
| 61 |
return model_card
|
| 62 |
|
| 63 |
@spaces.GPU
|
| 64 |
+
def load_model_gpu(model_name, quantization_config, auth_token) :
|
| 65 |
+
return AutoModel.from_pretrained(model_name, torch_dtype=torch.bfloat16, quantization_config=quantization_config, use_auth_token=auth_token.token)
|
| 66 |
+
|
| 67 |
+
def load_model_cpu(model_name, quantization_config, auth_token) :
|
| 68 |
+
return AutoModel.from_pretrained(model_name, torch_dtype=torch.bfloat16, quantization_config=quantization_config, use_auth_token=auth_token.token)
|
| 69 |
+
|
| 70 |
def quantize_model(model_name, quantization_type, group_size=128, auth_token=None, username=None, device="cuda"):
|
| 71 |
print(f"Quantizing model: {quantization_type}")
|
| 72 |
if quantization_type == "int4_weight_only" :
|
| 73 |
quantization_config = TorchAoConfig(quantization_type, group_size=group_size)
|
| 74 |
else :
|
| 75 |
quantization_config = TorchAoConfig(quantization_type)
|
| 76 |
+
if device == "cuda" :
|
| 77 |
+
model = load_model_gpu(model_name, quantization_config, auth_token)
|
| 78 |
+
else :
|
| 79 |
+
model = load_model_cpu(model_name, quantization_config, auth_token)
|
| 80 |
|
| 81 |
return model
|
| 82 |
|
|
|
|
| 118 |
return exists_message
|
| 119 |
if quantization_type == "int4_weight_only" and device == "cpu" :
|
| 120 |
return "int4_weight_only not supported on cpu"
|
| 121 |
+
# try :
|
| 122 |
+
quantized_model = quantize_model(model_name, quantization_type, group_size, oauth_token, profile.username, device)
|
| 123 |
+
return save_model(quantized_model, model_name, quantization_type, group_size, profile.username, oauth_token, quantized_model_name)
|
| 124 |
+
# except Exception as e :
|
| 125 |
+
# return e
|
| 126 |
|
| 127 |
|
| 128 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|