Yixiao Wang (Computer Science) commited on
Commit
e358772
·
1 Parent(s): 2376772

add model selector

Browse files
Files changed (1) hide show
  1. app.py +61 -36
app.py CHANGED
@@ -20,6 +20,12 @@ DEVICE_MAP = "auto"
20
  QUANTIZATION_BITS = None
21
  TEMPERATURE = 0.0
22
 
 
 
 
 
 
 
23
 
24
  SYSTEM_PROMPT = textwrap.dedent("""
25
  You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
@@ -106,39 +112,38 @@ def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -
106
 
107
 
108
  @spaces.GPU
109
- def label_single_response(story, question, criteria, response):
110
  prompt = format_prompt(story, question, criteria, response)
111
 
112
- if "longformer" in MODEL_ID:
113
- model, tokenizer = get_outlines_model(MODEL_ID, DEVICE_MAP, QUANTIZATION_BITS)
114
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
115
  with torch.no_grad():
116
  logits = model(**inputs).logits
117
  predicted_class = torch.argmax(logits, dim=1).item()
118
  return str(predicted_class)
119
  else:
120
- model = get_outlines_model(MODEL_ID, DEVICE_MAP, QUANTIZATION_BITS)
121
  generator = Generator(model)
122
  with torch.no_grad():
123
  result = generator(prompt)
124
  return result.score
125
 
126
-
127
  @spaces.GPU
128
- def label_multi_responses(story, question, criteria, response_file):
129
  df = pd.read_csv(response_file.name)
130
  assert "response" in df.columns, "CSV must contain a 'response' column."
131
  prompts = [format_prompt(story, question, criteria, resp) for resp in df["response"]]
132
 
133
- if "longformer" in MODEL_ID:
134
- model, tokenizer = get_outlines_model(MODEL_ID, DEVICE_MAP, QUANTIZATION_BITS)
135
  inputs = tokenizer(prompts, return_tensors="pt", truncation=True, padding=True)
136
  with torch.no_grad():
137
  logits = model(**inputs).logits
138
  predicted_classes = torch.argmax(logits, dim=1).tolist()
139
  scores = [str(cls) for cls in predicted_classes]
140
  else:
141
- model = get_outlines_model(MODEL_ID, DEVICE_MAP, QUANTIZATION_BITS)
142
  generator = Generator(model)
143
  with torch.no_grad():
144
  results = generator(prompts)
@@ -148,33 +153,53 @@ def label_multi_responses(story, question, criteria, response_file):
148
  return df
149
 
150
 
151
- single_tab = gr.Interface(
152
- fn=label_single_response,
153
- inputs=[
154
- gr.Textbox(label="Story", lines=6),
155
- gr.Textbox(label="Question", lines=2),
156
- gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
157
- gr.Textbox(label="Single Response", lines=3),
158
- ],
159
- outputs=gr.Textbox(label="Score"),
160
- )
161
-
162
- multi_tab = gr.Interface(
163
- fn=label_multi_responses,
164
- inputs=[
165
- gr.Textbox(label="Story", lines=6),
166
- gr.Textbox(label="Question", lines=2),
167
- gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
168
- gr.File(label="Responses CSV (.csv with 'response' column)", file_types=[".csv"]),
169
- ],
170
- outputs=gr.Dataframe(label="Labeled Responses", type="pandas"),
171
- )
172
-
173
- iface = gr.TabbedInterface(
174
- [single_tab, multi_tab],
175
- ["Single Response", "Batch (CSV)"],
176
- title="Zero-Shot Evaluation Grader",
177
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
  if __name__ == "__main__":
180
  iface.launch()
 
20
  QUANTIZATION_BITS = None
21
  TEMPERATURE = 0.0
22
 
23
+ AVAILABLE_MODELS = {
24
+ "Longformer": "rshwndsz/ft-longformer-base-4096",
25
+ "Llama 3.2 3B [Paraphrased]": "rshwndsz/ft_paraphrased-hermes-3-llama-3.2-3b"
26
+ }
27
+ DEFAULT_MODEL_ID = list(AVAILABLE_MODELS.values())[0]
28
+
29
 
30
  SYSTEM_PROMPT = textwrap.dedent("""
31
  You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
 
112
 
113
 
114
  @spaces.GPU
115
+ def label_single_response_with_model(model_id, story, question, criteria, response):
116
  prompt = format_prompt(story, question, criteria, response)
117
 
118
+ if "longformer" in model_id:
119
+ model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
120
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
121
  with torch.no_grad():
122
  logits = model(**inputs).logits
123
  predicted_class = torch.argmax(logits, dim=1).item()
124
  return str(predicted_class)
125
  else:
126
+ model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
127
  generator = Generator(model)
128
  with torch.no_grad():
129
  result = generator(prompt)
130
  return result.score
131
 
 
132
  @spaces.GPU
133
+ def label_multi_responses_with_model(model_id, story, question, criteria, response_file):
134
  df = pd.read_csv(response_file.name)
135
  assert "response" in df.columns, "CSV must contain a 'response' column."
136
  prompts = [format_prompt(story, question, criteria, resp) for resp in df["response"]]
137
 
138
+ if "longformer" in model_id:
139
+ model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
140
  inputs = tokenizer(prompts, return_tensors="pt", truncation=True, padding=True)
141
  with torch.no_grad():
142
  logits = model(**inputs).logits
143
  predicted_classes = torch.argmax(logits, dim=1).tolist()
144
  scores = [str(cls) for cls in predicted_classes]
145
  else:
146
+ model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
147
  generator = Generator(model)
148
  with torch.no_grad():
149
  results = generator(prompts)
 
153
  return df
154
 
155
 
156
+ def single_response_ui(model_id):
157
+ return gr.Interface(
158
+ fn=lambda story, question, criteria, response: label_single_response_with_model(
159
+ model_id.value, story, question, criteria, response
160
+ ),
161
+ inputs=[
162
+ gr.Textbox(label="Story", lines=6),
163
+ gr.Textbox(label="Question", lines=2),
164
+ gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
165
+ gr.Textbox(label="Single Response", lines=3),
166
+ ],
167
+ outputs=gr.Textbox(label="Score"),
168
+ live=False,
169
+ )
170
+
171
+ def multi_response_ui(model_id):
172
+ return gr.Interface(
173
+ fn=lambda story, question, criteria, response_file: label_multi_responses_with_model(
174
+ model_id.value, story, question, criteria, response_file
175
+ ),
176
+ inputs=[
177
+ gr.Textbox(label="Story", lines=6),
178
+ gr.Textbox(label="Question", lines=2),
179
+ gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
180
+ gr.File(label="Responses CSV (.csv with 'response' column)", file_types=[".csv"]),
181
+ ],
182
+ outputs=gr.Dataframe(label="Labeled Responses", type="pandas"),
183
+ live=False,
184
+ )
185
+
186
+ with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
187
+ model_selector = gr.Dropdown(
188
+ label="Select Model",
189
+ choices=list(AVAILABLE_MODELS.keys()),
190
+ value=list(AVAILABLE_MODELS.keys())[0],
191
+ )
192
+ selected_model_id = gr.State(value=DEFAULT_MODEL_ID)
193
+
194
+ def update_model_id(choice):
195
+ return AVAILABLE_MODELS[choice]
196
+
197
+ model_selector.change(fn=update_model_id, inputs=model_selector, outputs=selected_model_id)
198
+
199
+ gr.TabbedInterface(
200
+ [single_response_ui(selected_model_id), multi_response_ui(selected_model_id)],
201
+ ["Single Response", "Batch (CSV)"],
202
+ ).render()
203
 
204
  if __name__ == "__main__":
205
  iface.launch()