Spaces:

medmekk
/

TorchAO_Quantization

Running on A100

App Files Files Community

MekkCyber commited on Oct 17, 2024

Commit

7f64e83

1 Parent(s): 1bb9947

changing gradio version

Browse files

Files changed (2) hide show

README.md +1 -1
app.py +1 -129

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 💻
 colorFrom: blue
 colorTo: red
 sdk: gradio
-sdk_version: 4.39.0
 app_file: app.py
 pinned: false

 colorFrom: blue
 colorTo: red
 sdk: gradio
+sdk_version: 4.27.0
 app_file: app.py
 pinned: false

app.py CHANGED Viewed

@@ -196,132 +196,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
 # Launch the app
-app.launch()
-from torchao.quantization import (
-                int4_weight_only,
-                int8_dynamic_activation_int8_weight,
-                int8_weight_only,
-            )
-# import gradio as gr
-# import torch
-# from transformers import AutoModelForCausalLM, AutoTokenizer
-# import torch.ao.quantization as quant
-# import os
-# from huggingface_hub import HfApi
-# import tempfile
-# import torch.utils.data as data
-# from torchao.quantization import quantize_
-# def load_calibration_dataset(tokenizer, num_samples=100):
-#     # This is a placeholder. In a real scenario, you'd load actual data.
-#     dummy_texts = ["This is a sample text" for _ in range(num_samples)]
-#     encodings = tokenizer(dummy_texts, truncation=True, padding=True, return_tensors="pt")
-#     dataset = data.TensorDataset(encodings['input_ids'], encodings['attention_mask'])
-#     return data.DataLoader(dataset, batch_size=1)
-# def load_model(model_name):
-#     print(f"Loading model: {model_name}")
-#     model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map="auto")
-#     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-#     return model, tokenizer
-# def quantize_model(model, quant_type, dtype):
-#     print(f"Quantizing model: {quant_type} - {dtype}")
-#     quantize_(model, _STR_TO_METHOD[dtype](group_size=128))
-# def save_model(model, model_name, quant_type, dtype):
-#     print("Saving quantized model")
-#     model.save_pretrained("medmekk/model_llama", safe_serialization=False)
-#     with tempfile.TemporaryDirectory() as tmpdirname:
-#         model.save_pretrained(tmpdirname)
-#         # Create a new repo name
-#         repo_name = f"{model_name.split('/')[-1]}-quantized-{quant_type.lower()}-{dtype}bit"
-#         # Push to Hub
-#         api = HfApi()
-#         api.create_repo(repo_name, exist_ok=True)
-#         api.upload_folder(
-#             folder_path=tmpdirname,
-#             repo_id=repo_name,
-#             repo_type="model",
-#         )
-#     return f"https://huggingface.co/{repo_name}"
-# _STR_TO_METHOD = {
-#     "int4_weight_only": int4_weight_only,
-#     "int8_weight_only": int8_weight_only,
-#     "int8_dynamic_activation_int8_weight": int8_dynamic_activation_int8_weight,
-# }
-# def quantize_and_save(model_name, quant_type, dtype):
-#     model, tokenizer = load_model(model_name)
-#     quantize_model(model, quant_type, dtype)
-#     print(model.device)
-#     return save_model(model, model_name, quant_type, dtype)
-# # Gradio interface
-# with gr.Blocks(theme=gr.themes.Soft()) as app:
-#     gr.Markdown(
-#         """
-#         # 🚀 Model Quantization App
-#         Quantize your favorite Hugging Face models and save them to your profile!
-#         """
-#     )
-#     with gr.Row():
-#         with gr.Column():
-#             model_name = gr.Textbox(
-#                 label="Model Name",
-#                 placeholder="e.g., gpt2, distilgpt2",
-#                 value="meta-llama/Meta-Llama-3-8B-Instruct"
-#             )
-#             quant_type = gr.Dropdown(
-#                 label="Quantization Type",
-#                 choices=["Dynamic", "Static"],
-#                 value="Dynamic"
-#             )
-#             dtype = gr.Dropdown(
-#                 label="Data Type",
-#                 choices=["int4_weight_only", "int8_weight_only", "int8_dynamic_activation_int8_weight"],
-#                 value="int4_weight_only"
-#             )
-#         with gr.Column():
-#             quantize_button = gr.Button("Quantize and Save Model", variant="primary")
-#             output_link = gr.Textbox(label="Output", interactive=False)
-#     gr.Markdown(
-#         """
-#         ## Instructions
-#         1. Enter the name of the Hugging Face model you want to quantize.
-#         2. Choose the quantization type.
-#         3. If using Weight Only quantization, select the number of bits.
-#         4. Click "Quantize and Save Model" to start the process.
-#         5. Once complete, you'll receive a link to the quantized model on Hugging Face.
-#         Note: This process may take some time depending on the model size and your hardware.
-#         """
-#     )
-#     quantize_button.click(
-#         fn=quantize_and_save,
-#         inputs=[model_name, quant_type, dtype],
-#         outputs=[output_link]
-#     )
-# # Launch the app
-# app.launch(share=True)


196
197
198	# Launch the app
199	+ app.launch()