Spaces:

ActiveYixiao
/

automatic_coding

Sleeping

App Files Files Community

ActiveYixiao commited on Aug 29

Commit

e59d2a7

verified ·

1 Parent(s): b403571

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -108

app.py CHANGED Viewed

@@ -1,13 +1,14 @@
 import logging
 import textwrap
-from typing import Literal, Optional
 import gradio as gr
 import outlines
 import pandas as pd
 import spaces
 import torch
-from outlines import Generator
 from peft import PeftConfig, PeftModel
 from pydantic import BaseModel, ConfigDict
 from transformers import (
@@ -20,9 +21,11 @@ from transformers import (
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-MODEL_ID = "rshwndsz/ft-longformer-base-4096"
 DEVICE_MAP = "auto"
-QUANTIZATION_BITS = None
 TEMPERATURE = 0.0
 AVAILABLE_MODELS = [
@@ -39,7 +42,6 @@ AVAILABLE_MODELS = [
 ]
 DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
 SYSTEM_PROMPT = textwrap.dedent("""
 You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
 1. A story that was presented to participants as context
@@ -67,48 +69,68 @@ PROMPT_TEMPLATE = textwrap.dedent("""
 </Answer>
 Score:""").strip()
 class ResponseModel(BaseModel):
     model_config = ConfigDict(extra="forbid")
     score: Literal["0", "1"]
-def get_outlines_model(
-    model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = 4
-):
-    if quantization_bits == 4:
-        quantization_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_compute_dtype=torch.bfloat16,
-        )
-    elif quantization_bits == 8:
-        quantization_config = BitsAndBytesConfig(load_in_8bit=True)
-    else:
-        quantization_config = None
-    if "longformer" in model_id:
-        hf_model = AutoModelForSequenceClassification.from_pretrained(model_id)
-        hf_tokenizer = AutoTokenizer.from_pretrained(model_id)
-        return hf_model, hf_tokenizer
-    peft_config = PeftConfig.from_pretrained(model_id)
-    base_model_id = peft_config.base_model_name_or_path
-    base_model = AutoModelForCausalLM.from_pretrained(
-        base_model_id,
-        device_map=device_map,
-        quantization_config=quantization_config,
-    )
-    hf_model = PeftModel.from_pretrained(base_model, model_id)
-    hf_tokenizer = AutoTokenizer.from_pretrained(
-        base_model_id, use_fast=True, clean_up_tokenization_spaces=True
-    )
-    model = outlines.from_transformers(hf_model, hf_tokenizer)
-    return model
 def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
     prompt = PROMPT_TEMPLATE.format(
@@ -120,58 +142,80 @@ def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -
     full_prompt = SYSTEM_PROMPT + "\n\n" + prompt
     return full_prompt
 @spaces.GPU
 def label_single_response_with_model(model_id, story, question, criteria, response):
-    prompt = format_prompt(story, question, criteria, response)
-    if "longformer" in model_id:
-        model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
-        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
-        with torch.no_grad():
-            logits = model(**inputs).logits
-        predicted_class = torch.argmax(logits, dim=1).item()
-        return str(predicted_class)
-    else:
-        model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
-        generator = Generator(model)
-        with torch.no_grad():
             result = generator(prompt)
-        return result.score
 @spaces.GPU
-def label_multi_responses_with_model(
-    model_id, story, question, criteria, response_file
-):
-    df = pd.read_csv(response_file.name)
-    assert "response" in df.columns, "CSV must contain a 'response' column."
-    prompts = [
-        format_prompt(story, question, criteria, resp) for resp in df["response"]
-    ]
-    if "longformer" in model_id:
-        model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
-        inputs = tokenizer(prompts, return_tensors="pt", truncation=True, padding=True)
-        with torch.no_grad():
-            logits = model(**inputs).logits
-        predicted_classes = torch.argmax(logits, dim=1).tolist()
-        scores = [str(cls) for cls in predicted_classes]
-    else:
-        model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
-        generator = Generator(model)
-        with torch.no_grad():
-            results = generator(prompts)
-        scores = [r.score for r in results]
-    df["score"] = scores
-    return df
 def single_response_ui(model_id):
     return gr.Interface(
         fn=lambda story, question, criteria, response: label_single_response_with_model(
-            model_id.value, story, question, criteria, response
         ),
         inputs=[
             gr.Textbox(label="Story", lines=6),
@@ -181,51 +225,45 @@ def single_response_ui(model_id):
         ],
         outputs=gr.Textbox(label="Score"),
         live=False,
     )
 def multi_response_ui(model_id):
     return gr.Interface(
-        fn=lambda story,
-        question,
-        criteria,
-        response_file: label_multi_responses_with_model(
-            model_id.value, story, question, criteria, response_file
         ),
         inputs=[
             gr.Textbox(label="Story", lines=6),
             gr.Textbox(label="Question", lines=2),
             gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
             gr.File(
-                label="Responses CSV (.csv with 'response' column)", file_types=[".csv"]
             ),
         ],
         outputs=gr.Dataframe(label="Labeled Responses", type="pandas"),
         live=False,
     )
 with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
     model_selector = gr.Dropdown(
         label="Select Model",
         choices=AVAILABLE_MODELS,
-        value=AVAILABLE_MODELS[0],
-    )
-    selected_model_id = gr.State(value=DEFAULT_MODEL_ID)
-    def update_model_id(choice):
-        return choice
-    model_selector.change(
-        fn=update_model_id, inputs=model_selector, outputs=selected_model_id
     )
     with gr.Tabs():
         with gr.Tab("Single Response"):
-            single_response_ui(selected_model_id)
-        with gr.Tab("Batch (CSV)"):
-            multi_response_ui(selected_model_id)
 if __name__ == "__main__":
     iface.launch(share=True)

 import logging
 import textwrap
+import threading
+from typing import Literal, Optional, Tuple, Union
 import gradio as gr
 import outlines
 import pandas as pd
 import spaces
 import torch
+from outlines import generate
 from peft import PeftConfig, PeftModel
 from pydantic import BaseModel, ConfigDict
 from transformers import (
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Configuration
+MODEL_CACHE = {}
+MODEL_LOCK = threading.Lock()
 DEVICE_MAP = "auto"
+QUANTIZATION_BITS = 4  # Changed to 4-bit by default for efficiency
 TEMPERATURE = 0.0
 AVAILABLE_MODELS = [
 ]
 DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
 SYSTEM_PROMPT = textwrap.dedent("""
 You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
 1. A story that was presented to participants as context
 </Answer>
 Score:""").strip()
 class ResponseModel(BaseModel):
     model_config = ConfigDict(extra="forbid")
     score: Literal["0", "1"]
+def get_model_and_tokenizer(
+    model_id: str,
+    device_map: str = "auto",
+    quantization_bits: Optional[int] = 4
+) -> Tuple[Union[AutoModelForCausalLM, AutoModelForSequenceClassification], AutoTokenizer]:
+    """Load model and tokenizer with caching"""
+    with MODEL_LOCK:
+        if model_id in MODEL_CACHE:
+            return MODEL_CACHE[model_id]
+        try:
+            if quantization_bits == 4:
+                quantization_config = BitsAndBytesConfig(
+                    load_in_4bit=True,
+                    bnb_4bit_quant_type="nf4",
+                    bnb_4bit_use_double_quant=True,
+                    bnb_4bit_compute_dtype=torch.bfloat16,
+                )
+            elif quantization_bits == 8:
+                quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+            else:
+                quantization_config = None
+            if "longformer" in model_id:
+                # For sequence classification models
+                model = AutoModelForSequenceClassification.from_pretrained(
+                    model_id,
+                    device_map=device_map
+                )
+                tokenizer = AutoTokenizer.from_pretrained(model_id)
+                if tokenizer.pad_token is None:
+                    tokenizer.pad_token = tokenizer.eos_token
+            else:
+                # For causal LM models
+                peft_config = PeftConfig.from_pretrained(model_id)
+                base_model_id = peft_config.base_model_name_or_path
+                model = AutoModelForCausalLM.from_pretrained(
+                    base_model_id,
+                    device_map=device_map,
+                    quantization_config=quantization_config,
+                    torch_dtype=torch.bfloat16,
+                )
+                model = PeftModel.from_pretrained(model, model_id)
+                tokenizer = AutoTokenizer.from_pretrained(
+                    base_model_id,
+                    use_fast=True,
+                    clean_up_tokenization_spaces=True
+                )
+                if tokenizer.pad_token is None:
+                    tokenizer.pad_token = tokenizer.eos_token
+            MODEL_CACHE[model_id] = (model, tokenizer)
+            return model, tokenizer
+        except Exception as e:
+            logger.error(f"Error loading model {model_id}: {str(e)}")
+            raise
 def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
     prompt = PROMPT_TEMPLATE.format(
     full_prompt = SYSTEM_PROMPT + "\n\n" + prompt
     return full_prompt
 @spaces.GPU
 def label_single_response_with_model(model_id, story, question, criteria, response):
+    try:
+        prompt = format_prompt(story, question, criteria, response)
+        model, tokenizer = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
+        if "longformer" in model_id:
+            # Sequence classification approach
+            inputs = tokenizer(
+                prompt,
+                return_tensors="pt",
+                truncation=True,
+                padding=True,
+                max_length=4096
+            )
+            with torch.no_grad():
+                logits = model(**inputs).logits
+            predicted_class = torch.argmax(logits, dim=1).item()
+            return str(predicted_class)
+        else:
+            # Structured generation with outlines
+            generator = generate.json(model, ResponseModel, max_tokens=20)
             result = generator(prompt)
+            return result.score
+    except Exception as e:
+        logger.error(f"Error in single response labeling: {str(e)}")
+        return f"Error: {str(e)}"
 @spaces.GPU
+def label_multi_responses_with_model(model_id, story, question, criteria, response_file):
+    try:
+        df = pd.read_csv(response_file.name)
+        assert "response" in df.columns, "CSV must contain a 'response' column."
+        model, tokenizer = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
+        scores = []
+        if "longformer" in model_id:
+            # Batch processing for sequence classification
+            prompts = [
+                format_prompt(story, question, criteria, resp)
+                for resp in df["response"]
+            ]
+            inputs = tokenizer(
+                prompts,
+                return_tensors="pt",
+                truncation=True,
+                padding=True,
+                max_length=4096
+            )
+            with torch.no_grad():
+                logits = model(**inputs).logits
+            predicted_classes = torch.argmax(logits, dim=1).tolist()
+            scores = [str(cls) for cls in predicted_classes]
+        else:
+            # Sequential processing for generative models
+            generator = generate.json(model, ResponseModel, max_tokens=20)
+            for response in df["response"]:
+                prompt = format_prompt(story, question, criteria, response)
+                result = generator(prompt)
+                scores.append(result.score)
+        df["score"] = scores
+        return df
+    except Exception as e:
+        logger.error(f"Error in multi response labeling: {str(e)}")
+        return pd.DataFrame({"error": [str(e)]})
 def single_response_ui(model_id):
     return gr.Interface(
         fn=lambda story, question, criteria, response: label_single_response_with_model(
+            model_id, story, question, criteria, response
         ),
         inputs=[
             gr.Textbox(label="Story", lines=6),
         ],
         outputs=gr.Textbox(label="Score"),
         live=False,
+        title="Single Response Grader",
+        description="Grade a single response against the story, question, and criteria"
     )
 def multi_response_ui(model_id):
     return gr.Interface(
+        fn=lambda story, question, criteria, response_file: label_multi_responses_with_model(
+            model_id, story, question, criteria, response_file
         ),
         inputs=[
             gr.Textbox(label="Story", lines=6),
             gr.Textbox(label="Question", lines=2),
             gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
             gr.File(
+                label="Responses CSV (.csv with 'response' column)",
+                file_types=[".csv"]
             ),
         ],
         outputs=gr.Dataframe(label="Labeled Responses", type="pandas"),
         live=False,
+        title="Batch Response Grader",
+        description="Upload a CSV file with responses to grade them in batch"
     )
 with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
+    gr.Markdown("# Zero-Shot Evaluation Grader")
+    gr.Markdown("Select a model and then use either the single response or batch processing tab.")
     model_selector = gr.Dropdown(
         label="Select Model",
         choices=AVAILABLE_MODELS,
+        value=DEFAULT_MODEL_ID,
     )
     with gr.Tabs():
         with gr.Tab("Single Response"):
+            single_response_ui(model_selector.value)
+        with gr.Tab("Batch Processing (CSV)"):
+            multi_response_ui(model_selector.value)
 if __name__ == "__main__":
     iface.launch(share=True)