Spaces:

ActiveYixiao
/

automatic_coding

Sleeping

App Files Files Community

ActiveYixiao commited on Aug 29

Commit

af231f5

verified ·

1 Parent(s): e59d2a7

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -182

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import logging
 import textwrap
-import threading
-from typing import Literal, Optional, Tuple, Union
 import gradio as gr
 import outlines
@@ -21,11 +20,9 @@ from transformers import (
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Configuration
-MODEL_CACHE = {}
-MODEL_LOCK = threading.Lock()
 DEVICE_MAP = "auto"
-QUANTIZATION_BITS = 4  # Changed to 4-bit by default for efficiency
 TEMPERATURE = 0.0
 AVAILABLE_MODELS = [
@@ -42,6 +39,10 @@ AVAILABLE_MODELS = [
 ]
 DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
 SYSTEM_PROMPT = textwrap.dedent("""
 You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
 1. A story that was presented to participants as context
@@ -69,68 +70,44 @@ PROMPT_TEMPLATE = textwrap.dedent("""
 </Answer>
 Score:""").strip()
-class ResponseModel(BaseModel):
-    model_config = ConfigDict(extra="forbid")
-    score: Literal["0", "1"]
-def get_model_and_tokenizer(
-    model_id: str,
-    device_map: str = "auto",
-    quantization_bits: Optional[int] = 4
-) -> Tuple[Union[AutoModelForCausalLM, AutoModelForSequenceClassification], AutoTokenizer]:
-    """Load model and tokenizer with caching"""
-    with MODEL_LOCK:
-        if model_id in MODEL_CACHE:
-            return MODEL_CACHE[model_id]
-        try:
-            if quantization_bits == 4:
-                quantization_config = BitsAndBytesConfig(
-                    load_in_4bit=True,
-                    bnb_4bit_quant_type="nf4",
-                    bnb_4bit_use_double_quant=True,
-                    bnb_4bit_compute_dtype=torch.bfloat16,
-                )
-            elif quantization_bits == 8:
-                quantization_config = BitsAndBytesConfig(load_in_8bit=True)
-            else:
-                quantization_config = None
-            if "longformer" in model_id:
-                # For sequence classification models
-                model = AutoModelForSequenceClassification.from_pretrained(
-                    model_id,
-                    device_map=device_map
-                )
-                tokenizer = AutoTokenizer.from_pretrained(model_id)
-                if tokenizer.pad_token is None:
-                    tokenizer.pad_token = tokenizer.eos_token
-            else:
-                # For causal LM models
-                peft_config = PeftConfig.from_pretrained(model_id)
-                base_model_id = peft_config.base_model_name_or_path
-                model = AutoModelForCausalLM.from_pretrained(
-                    base_model_id,
-                    device_map=device_map,
-                    quantization_config=quantization_config,
-                    torch_dtype=torch.bfloat16,
-                )
-                model = PeftModel.from_pretrained(model, model_id)
-                tokenizer = AutoTokenizer.from_pretrained(
-                    base_model_id,
-                    use_fast=True,
-                    clean_up_tokenization_spaces=True
-                )
-                if tokenizer.pad_token is None:
-                    tokenizer.pad_token = tokenizer.eos_token
-            MODEL_CACHE[model_id] = (model, tokenizer)
-            return model, tokenizer
-        except Exception as e:
-            logger.error(f"Error loading model {model_id}: {str(e)}")
-            raise
 def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
     prompt = PROMPT_TEMPLATE.format(
@@ -142,128 +119,57 @@ def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -
     full_prompt = SYSTEM_PROMPT + "\n\n" + prompt
     return full_prompt
 @spaces.GPU
 def label_single_response_with_model(model_id, story, question, criteria, response):
-    try:
-        prompt = format_prompt(story, question, criteria, response)
-        model, tokenizer = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
-        if "longformer" in model_id:
-            # Sequence classification approach
-            inputs = tokenizer(
-                prompt,
-                return_tensors="pt",
-                truncation=True,
-                padding=True,
-                max_length=4096
-            )
-            with torch.no_grad():
-                logits = model(**inputs).logits
-            predicted_class = torch.argmax(logits, dim=1).item()
-            return str(predicted_class)
-        else:
-            # Structured generation with outlines
-            generator = generate.json(model, ResponseModel, max_tokens=20)
-            result = generator(prompt)
-            return result.score
-    except Exception as e:
-        logger.error(f"Error in single response labeling: {str(e)}")
-        return f"Error: {str(e)}"
 @spaces.GPU
-def label_multi_responses_with_model(model_id, story, question, criteria, response_file):
-    try:
-        df = pd.read_csv(response_file.name)
-        assert "response" in df.columns, "CSV must contain a 'response' column."
-        model, tokenizer = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
         scores = []
-        if "longformer" in model_id:
-            # Batch processing for sequence classification
-            prompts = [
-                format_prompt(story, question, criteria, resp)
-                for resp in df["response"]
-            ]
-            inputs = tokenizer(
-                prompts,
-                return_tensors="pt",
-                truncation=True,
-                padding=True,
-                max_length=4096
-            )
-            with torch.no_grad():
-                logits = model(**inputs).logits
-            predicted_classes = torch.argmax(logits, dim=1).tolist()
-            scores = [str(cls) for cls in predicted_classes]
-        else:
-            # Sequential processing for generative models
-            generator = generate.json(model, ResponseModel, max_tokens=20)
-            for response in df["response"]:
-                prompt = format_prompt(story, question, criteria, response)
-                result = generator(prompt)
-                scores.append(result.score)
-        df["score"] = scores
-        return df
-    except Exception as e:
-        logger.error(f"Error in multi response labeling: {str(e)}")
-        return pd.DataFrame({"error": [str(e)]})
-def single_response_ui(model_id):
-    return gr.Interface(
-        fn=lambda story, question, criteria, response: label_single_response_with_model(
-            model_id, story, question, criteria, response
-        ),
-        inputs=[
-            gr.Textbox(label="Story", lines=6),
-            gr.Textbox(label="Question", lines=2),
-            gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
-            gr.Textbox(label="Single Response", lines=3),
-        ],
-        outputs=gr.Textbox(label="Score"),
-        live=False,
-        title="Single Response Grader",
-        description="Grade a single response against the story, question, and criteria"
-    )
-def multi_response_ui(model_id):
-    return gr.Interface(
-        fn=lambda story, question, criteria, response_file: label_multi_responses_with_model(
-            model_id, story, question, criteria, response_file
-        ),
-        inputs=[
-            gr.Textbox(label="Story", lines=6),
-            gr.Textbox(label="Question", lines=2),
-            gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
-            gr.File(
-                label="Responses CSV (.csv with 'response' column)",
-                file_types=[".csv"]
-            ),
-        ],
-        outputs=gr.Dataframe(label="Labeled Responses", type="pandas"),
-        live=False,
-        title="Batch Response Grader",
-        description="Upload a CSV file with responses to grade them in batch"
-    )
-with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
-    gr.Markdown("# Zero-Shot Evaluation Grader")
-    gr.Markdown("Select a model and then use either the single response or batch processing tab.")
-    model_selector = gr.Dropdown(
-        label="Select Model",
-        choices=AVAILABLE_MODELS,
-        value=DEFAULT_MODEL_ID,
-    )
-    with gr.Tabs():
-        with gr.Tab("Single Response"):
-            single_response_ui(model_selector.value)
-        with gr.Tab("Batch Processing (CSV)"):
-            multi_response_ui(model_selector.value)
-if __name__ == "__main__":
-    iface.launch(share=True)

 import logging
 import textwrap
+from typing import Literal, Optional
 import gradio as gr
 import outlines
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+MODEL_ID = "rshwndsz/ft-longformer-base-4096"
 DEVICE_MAP = "auto"
+QUANTIZATION_BITS = None
 TEMPERATURE = 0.0
 AVAILABLE_MODELS = [
 ]
 DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
+# Define response model
+class ResponseModel(BaseModel):
+    score: Literal["0", "1"]
 SYSTEM_PROMPT = textwrap.dedent("""
 You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
 1. A story that was presented to participants as context
 </Answer>
 Score:""").strip()
+def get_outlines_model(
+    model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = 4
+):
+    if quantization_bits == 4:
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_compute_dtype=torch.bfloat16,
+        )
+    elif quantization_bits == 8:
+        quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+    else:
+        quantization_config = None
+    if "longformer" in model_id:
+        hf_model = AutoModelForSequenceClassification.from_pretrained(model_id)
+        hf_tokenizer = AutoTokenizer.from_pretrained(model_id)
+        return hf_model, hf_tokenizer
+    peft_config = PeftConfig.from_pretrained(model_id)
+    base_model_id = peft_config.base_model_name_or_path
+    base_model = AutoModelForCausalLM.from_pretrained(
+        base_model_id,
+        device_map=device_map,
+        quantization_config=quantization_config,
+        torch_dtype=torch.bfloat16,
+    )
+    hf_model = PeftModel.from_pretrained(base_model, model_id)
+    hf_tokenizer = AutoTokenizer.from_pretrained(
+        base_model_id, use_fast=True, clean_up_tokenization_spaces=True
+    )
+    hf_tokenizer.pad_token = hf_tokenizer.eos_token
+    return hf_model, hf_tokenizer
 def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
     prompt = PROMPT_TEMPLATE.format(
     full_prompt = SYSTEM_PROMPT + "\n\n" + prompt
     return full_prompt
 @spaces.GPU
 def label_single_response_with_model(model_id, story, question, criteria, response):
+    prompt = format_prompt(story, question, criteria, response)
+    logger.info(f"Prompt: {prompt}")
+    if "longformer" in model_id:
+        model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=4096)
+        with torch.no_grad():
+            logits = model(**inputs).logits
+        predicted_class = torch.argmax(logits, dim=1).item()
+        logger.info(f"Predicted class: {predicted_class}")
+        return str(predicted_class)
+    else:
+        model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
+        # Use structured generation with outlines
+        generator = generate.json(model, ResponseModel)
+        result = generator(prompt, max_tokens=20)
+        logger.info(f"Generated result: {result}")
+        return result.score
 @spaces.GPU
+def label_multi_responses_with_model(
+    model_id, story, question, criteria, response_file
+):
+    df = pd.read_csv(response_file.name)
+    assert "response" in df.columns, "CSV must contain a 'response' column."
+    if "longformer" in model_id:
+        model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
+        prompts = [
+            format_prompt(story, question, criteria, resp) for resp in df["response"]
+        ]
+        inputs = tokenizer(prompts, return_tensors="pt", truncation=True, padding=True, max_length=4096)
+        with torch.no_grad():
+            logits = model(**inputs).logits
+        predicted_classes = torch.argmax(logits, dim=1).tolist()
+        scores = [str(cls) for cls in predicted_classes]
+    else:
+        model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
+        generator = generate.json(model, ResponseModel)
         scores = []
+        for resp in df["response"]:
+            prompt = format_prompt(story, question, criteria, resp)
+            result = generator(prompt, max_tokens=20)
+            scores.append(result.score)
+    df["score"] = scores
+    return df
+# Rest of the code remains the same...