Spaces:

ActiveYixiao
/

automatic_coding

Sleeping

App Files Files Community

Yixiao Wang (Computer Science) commited on Aug 7

Commit

e358772

1 Parent(s): 2376772

add model selector

Browse files

Files changed (1) hide show

app.py +61 -36

app.py CHANGED Viewed

@@ -20,6 +20,12 @@ DEVICE_MAP = "auto"
 QUANTIZATION_BITS = None
 TEMPERATURE = 0.0
 SYSTEM_PROMPT = textwrap.dedent("""
 You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
@@ -106,39 +112,38 @@ def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -
 @spaces.GPU
-def label_single_response(story, question, criteria, response):
     prompt = format_prompt(story, question, criteria, response)
-    if "longformer" in MODEL_ID:
-        model, tokenizer = get_outlines_model(MODEL_ID, DEVICE_MAP, QUANTIZATION_BITS)
         inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
         with torch.no_grad():
             logits = model(**inputs).logits
         predicted_class = torch.argmax(logits, dim=1).item()
         return str(predicted_class)
     else:
-        model = get_outlines_model(MODEL_ID, DEVICE_MAP, QUANTIZATION_BITS)
         generator = Generator(model)
         with torch.no_grad():
             result = generator(prompt)
         return result.score
 @spaces.GPU
-def label_multi_responses(story, question, criteria, response_file):
     df = pd.read_csv(response_file.name)
     assert "response" in df.columns, "CSV must contain a 'response' column."
     prompts = [format_prompt(story, question, criteria, resp) for resp in df["response"]]
-    if "longformer" in MODEL_ID:
-        model, tokenizer = get_outlines_model(MODEL_ID, DEVICE_MAP, QUANTIZATION_BITS)
         inputs = tokenizer(prompts, return_tensors="pt", truncation=True, padding=True)
         with torch.no_grad():
             logits = model(**inputs).logits
         predicted_classes = torch.argmax(logits, dim=1).tolist()
         scores = [str(cls) for cls in predicted_classes]
     else:
-        model = get_outlines_model(MODEL_ID, DEVICE_MAP, QUANTIZATION_BITS)
         generator = Generator(model)
         with torch.no_grad():
             results = generator(prompts)
@@ -148,33 +153,53 @@ def label_multi_responses(story, question, criteria, response_file):
     return df
-single_tab = gr.Interface(
-    fn=label_single_response,
-    inputs=[
-        gr.Textbox(label="Story", lines=6),
-        gr.Textbox(label="Question", lines=2),
-        gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
-        gr.Textbox(label="Single Response", lines=3),
-    ],
-    outputs=gr.Textbox(label="Score"),
-)
-multi_tab = gr.Interface(
-    fn=label_multi_responses,
-    inputs=[
-        gr.Textbox(label="Story", lines=6),
-        gr.Textbox(label="Question", lines=2),
-        gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
-        gr.File(label="Responses CSV (.csv with 'response' column)", file_types=[".csv"]),
-    ],
-    outputs=gr.Dataframe(label="Labeled Responses", type="pandas"),
-)
-iface = gr.TabbedInterface(
-    [single_tab, multi_tab],
-    ["Single Response", "Batch (CSV)"],
-    title="Zero-Shot Evaluation Grader",
-)
 if __name__ == "__main__":
     iface.launch()

 QUANTIZATION_BITS = None
 TEMPERATURE = 0.0
+AVAILABLE_MODELS = {
+    "Longformer": "rshwndsz/ft-longformer-base-4096",
+    "Llama 3.2 3B [Paraphrased]": "rshwndsz/ft_paraphrased-hermes-3-llama-3.2-3b"
+}
+DEFAULT_MODEL_ID = list(AVAILABLE_MODELS.values())[0]
 SYSTEM_PROMPT = textwrap.dedent("""
 You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
 @spaces.GPU
+def label_single_response_with_model(model_id, story, question, criteria, response):
     prompt = format_prompt(story, question, criteria, response)
+    if "longformer" in model_id:
+        model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
         inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
         with torch.no_grad():
             logits = model(**inputs).logits
         predicted_class = torch.argmax(logits, dim=1).item()
         return str(predicted_class)
     else:
+        model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
         generator = Generator(model)
         with torch.no_grad():
             result = generator(prompt)
         return result.score
 @spaces.GPU
+def label_multi_responses_with_model(model_id, story, question, criteria, response_file):
     df = pd.read_csv(response_file.name)
     assert "response" in df.columns, "CSV must contain a 'response' column."
     prompts = [format_prompt(story, question, criteria, resp) for resp in df["response"]]
+    if "longformer" in model_id:
+        model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
         inputs = tokenizer(prompts, return_tensors="pt", truncation=True, padding=True)
         with torch.no_grad():
             logits = model(**inputs).logits
         predicted_classes = torch.argmax(logits, dim=1).tolist()
         scores = [str(cls) for cls in predicted_classes]
     else:
+        model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
         generator = Generator(model)
         with torch.no_grad():
             results = generator(prompts)
     return df
+def single_response_ui(model_id):
+    return gr.Interface(
+        fn=lambda story, question, criteria, response: label_single_response_with_model(
+            model_id.value, story, question, criteria, response
+        ),
+        inputs=[
+            gr.Textbox(label="Story", lines=6),
+            gr.Textbox(label="Question", lines=2),
+            gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
+            gr.Textbox(label="Single Response", lines=3),
+        ],
+        outputs=gr.Textbox(label="Score"),
+        live=False,
+    )
+def multi_response_ui(model_id):
+    return gr.Interface(
+        fn=lambda story, question, criteria, response_file: label_multi_responses_with_model(
+            model_id.value, story, question, criteria, response_file
+        ),
+        inputs=[
+            gr.Textbox(label="Story", lines=6),
+            gr.Textbox(label="Question", lines=2),
+            gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
+            gr.File(label="Responses CSV (.csv with 'response' column)", file_types=[".csv"]),
+        ],
+        outputs=gr.Dataframe(label="Labeled Responses", type="pandas"),
+        live=False,
+    )
+with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
+    model_selector = gr.Dropdown(
+        label="Select Model",
+        choices=list(AVAILABLE_MODELS.keys()),
+        value=list(AVAILABLE_MODELS.keys())[0],
+    )
+    selected_model_id = gr.State(value=DEFAULT_MODEL_ID)
+    def update_model_id(choice):
+        return AVAILABLE_MODELS[choice]
+    model_selector.change(fn=update_model_id, inputs=model_selector, outputs=selected_model_id)
+    gr.TabbedInterface(
+        [single_response_ui(selected_model_id), multi_response_ui(selected_model_id)],
+        ["Single Response", "Batch (CSV)"],
+    ).render()
 if __name__ == "__main__":
     iface.launch()