Friday-V1 / app.py
saikrishnagorijala's picture
Update app.py
ff4d990 verified
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import gradio as gr
import torch
model_id = "saikrishnagorijala/friday-V1"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Define quantization config for 8-bit inference
bnb_config = BitsAndBytesConfig(
load_in_8bit=True,
bnb_8bit_use_double_quant=True,
bnb_8bit_quant_type="nf4",
bnb_8bit_compute_dtype=torch.float16
)
# Load model with quantization_config
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
quantization_config=bnb_config
)
def chat(prompt):
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=200,
do_sample=True,
temperature=1.2,
top_p=0.9
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
demo = gr.Interface(fn=chat, inputs="text", outputs="text", title="Friday-V1 Chatbot")
demo.launch()