ActiveYixiao commited on
Commit
e59d2a7
·
verified ·
1 Parent(s): b403571

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -108
app.py CHANGED
@@ -1,13 +1,14 @@
1
  import logging
2
  import textwrap
3
- from typing import Literal, Optional
 
4
 
5
  import gradio as gr
6
  import outlines
7
  import pandas as pd
8
  import spaces
9
  import torch
10
- from outlines import Generator
11
  from peft import PeftConfig, PeftModel
12
  from pydantic import BaseModel, ConfigDict
13
  from transformers import (
@@ -20,9 +21,11 @@ from transformers import (
20
  logging.basicConfig(level=logging.INFO)
21
  logger = logging.getLogger(__name__)
22
 
23
- MODEL_ID = "rshwndsz/ft-longformer-base-4096"
 
 
24
  DEVICE_MAP = "auto"
25
- QUANTIZATION_BITS = None
26
  TEMPERATURE = 0.0
27
 
28
  AVAILABLE_MODELS = [
@@ -39,7 +42,6 @@ AVAILABLE_MODELS = [
39
  ]
40
  DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
41
 
42
-
43
  SYSTEM_PROMPT = textwrap.dedent("""
44
  You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
45
  1. A story that was presented to participants as context
@@ -67,48 +69,68 @@ PROMPT_TEMPLATE = textwrap.dedent("""
67
  </Answer>
68
  Score:""").strip()
69
 
70
-
71
  class ResponseModel(BaseModel):
72
  model_config = ConfigDict(extra="forbid")
73
  score: Literal["0", "1"]
74
 
75
-
76
- def get_outlines_model(
77
- model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = 4
78
- ):
79
- if quantization_bits == 4:
80
- quantization_config = BitsAndBytesConfig(
81
- load_in_4bit=True,
82
- bnb_4bit_quant_type="nf4",
83
- bnb_4bit_use_double_quant=True,
84
- bnb_4bit_compute_dtype=torch.bfloat16,
85
- )
86
- elif quantization_bits == 8:
87
- quantization_config = BitsAndBytesConfig(load_in_8bit=True)
88
- else:
89
- quantization_config = None
90
-
91
- if "longformer" in model_id:
92
- hf_model = AutoModelForSequenceClassification.from_pretrained(model_id)
93
- hf_tokenizer = AutoTokenizer.from_pretrained(model_id)
94
- return hf_model, hf_tokenizer
95
-
96
- peft_config = PeftConfig.from_pretrained(model_id)
97
- base_model_id = peft_config.base_model_name_or_path
98
-
99
- base_model = AutoModelForCausalLM.from_pretrained(
100
- base_model_id,
101
- device_map=device_map,
102
- quantization_config=quantization_config,
103
- )
104
- hf_model = PeftModel.from_pretrained(base_model, model_id)
105
- hf_tokenizer = AutoTokenizer.from_pretrained(
106
- base_model_id, use_fast=True, clean_up_tokenization_spaces=True
107
- )
108
-
109
- model = outlines.from_transformers(hf_model, hf_tokenizer)
110
- return model
111
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
114
  prompt = PROMPT_TEMPLATE.format(
@@ -120,58 +142,80 @@ def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -
120
  full_prompt = SYSTEM_PROMPT + "\n\n" + prompt
121
  return full_prompt
122
 
123
-
124
  @spaces.GPU
125
  def label_single_response_with_model(model_id, story, question, criteria, response):
126
- prompt = format_prompt(story, question, criteria, response)
127
-
128
- if "longformer" in model_id:
129
- model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
130
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
131
- with torch.no_grad():
132
- logits = model(**inputs).logits
133
- predicted_class = torch.argmax(logits, dim=1).item()
134
- return str(predicted_class)
135
- else:
136
- model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
137
- generator = Generator(model)
138
- with torch.no_grad():
 
 
 
 
 
 
 
139
  result = generator(prompt)
140
- return result.score
141
-
 
 
 
142
 
143
  @spaces.GPU
144
- def label_multi_responses_with_model(
145
- model_id, story, question, criteria, response_file
146
- ):
147
- df = pd.read_csv(response_file.name)
148
- assert "response" in df.columns, "CSV must contain a 'response' column."
149
- prompts = [
150
- format_prompt(story, question, criteria, resp) for resp in df["response"]
151
- ]
152
-
153
- if "longformer" in model_id:
154
- model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
155
- inputs = tokenizer(prompts, return_tensors="pt", truncation=True, padding=True)
156
- with torch.no_grad():
157
- logits = model(**inputs).logits
158
- predicted_classes = torch.argmax(logits, dim=1).tolist()
159
- scores = [str(cls) for cls in predicted_classes]
160
- else:
161
- model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
162
- generator = Generator(model)
163
- with torch.no_grad():
164
- results = generator(prompts)
165
- scores = [r.score for r in results]
166
-
167
- df["score"] = scores
168
- return df
169
-
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  def single_response_ui(model_id):
172
  return gr.Interface(
173
  fn=lambda story, question, criteria, response: label_single_response_with_model(
174
- model_id.value, story, question, criteria, response
175
  ),
176
  inputs=[
177
  gr.Textbox(label="Story", lines=6),
@@ -181,51 +225,45 @@ def single_response_ui(model_id):
181
  ],
182
  outputs=gr.Textbox(label="Score"),
183
  live=False,
 
 
184
  )
185
 
186
-
187
  def multi_response_ui(model_id):
188
  return gr.Interface(
189
- fn=lambda story,
190
- question,
191
- criteria,
192
- response_file: label_multi_responses_with_model(
193
- model_id.value, story, question, criteria, response_file
194
  ),
195
  inputs=[
196
  gr.Textbox(label="Story", lines=6),
197
  gr.Textbox(label="Question", lines=2),
198
  gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
199
  gr.File(
200
- label="Responses CSV (.csv with 'response' column)", file_types=[".csv"]
 
201
  ),
202
  ],
203
  outputs=gr.Dataframe(label="Labeled Responses", type="pandas"),
204
  live=False,
 
 
205
  )
206
 
207
-
208
  with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
 
 
 
209
  model_selector = gr.Dropdown(
210
  label="Select Model",
211
  choices=AVAILABLE_MODELS,
212
- value=AVAILABLE_MODELS[0],
213
- )
214
- selected_model_id = gr.State(value=DEFAULT_MODEL_ID)
215
-
216
- def update_model_id(choice):
217
- return choice
218
-
219
- model_selector.change(
220
- fn=update_model_id, inputs=model_selector, outputs=selected_model_id
221
  )
222
-
223
  with gr.Tabs():
224
  with gr.Tab("Single Response"):
225
- single_response_ui(selected_model_id)
226
- with gr.Tab("Batch (CSV)"):
227
- multi_response_ui(selected_model_id)
228
-
229
 
230
  if __name__ == "__main__":
231
  iface.launch(share=True)
 
1
  import logging
2
  import textwrap
3
+ import threading
4
+ from typing import Literal, Optional, Tuple, Union
5
 
6
  import gradio as gr
7
  import outlines
8
  import pandas as pd
9
  import spaces
10
  import torch
11
+ from outlines import generate
12
  from peft import PeftConfig, PeftModel
13
  from pydantic import BaseModel, ConfigDict
14
  from transformers import (
 
21
  logging.basicConfig(level=logging.INFO)
22
  logger = logging.getLogger(__name__)
23
 
24
+ # Configuration
25
+ MODEL_CACHE = {}
26
+ MODEL_LOCK = threading.Lock()
27
  DEVICE_MAP = "auto"
28
+ QUANTIZATION_BITS = 4 # Changed to 4-bit by default for efficiency
29
  TEMPERATURE = 0.0
30
 
31
  AVAILABLE_MODELS = [
 
42
  ]
43
  DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
44
 
 
45
  SYSTEM_PROMPT = textwrap.dedent("""
46
  You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
47
  1. A story that was presented to participants as context
 
69
  </Answer>
70
  Score:""").strip()
71
 
 
72
  class ResponseModel(BaseModel):
73
  model_config = ConfigDict(extra="forbid")
74
  score: Literal["0", "1"]
75
 
76
+ def get_model_and_tokenizer(
77
+ model_id: str,
78
+ device_map: str = "auto",
79
+ quantization_bits: Optional[int] = 4
80
+ ) -> Tuple[Union[AutoModelForCausalLM, AutoModelForSequenceClassification], AutoTokenizer]:
81
+ """Load model and tokenizer with caching"""
82
+ with MODEL_LOCK:
83
+ if model_id in MODEL_CACHE:
84
+ return MODEL_CACHE[model_id]
85
+
86
+ try:
87
+ if quantization_bits == 4:
88
+ quantization_config = BitsAndBytesConfig(
89
+ load_in_4bit=True,
90
+ bnb_4bit_quant_type="nf4",
91
+ bnb_4bit_use_double_quant=True,
92
+ bnb_4bit_compute_dtype=torch.bfloat16,
93
+ )
94
+ elif quantization_bits == 8:
95
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
96
+ else:
97
+ quantization_config = None
98
+
99
+ if "longformer" in model_id:
100
+ # For sequence classification models
101
+ model = AutoModelForSequenceClassification.from_pretrained(
102
+ model_id,
103
+ device_map=device_map
104
+ )
105
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
106
+ if tokenizer.pad_token is None:
107
+ tokenizer.pad_token = tokenizer.eos_token
108
+ else:
109
+ # For causal LM models
110
+ peft_config = PeftConfig.from_pretrained(model_id)
111
+ base_model_id = peft_config.base_model_name_or_path
112
+
113
+ model = AutoModelForCausalLM.from_pretrained(
114
+ base_model_id,
115
+ device_map=device_map,
116
+ quantization_config=quantization_config,
117
+ torch_dtype=torch.bfloat16,
118
+ )
119
+ model = PeftModel.from_pretrained(model, model_id)
120
+ tokenizer = AutoTokenizer.from_pretrained(
121
+ base_model_id,
122
+ use_fast=True,
123
+ clean_up_tokenization_spaces=True
124
+ )
125
+ if tokenizer.pad_token is None:
126
+ tokenizer.pad_token = tokenizer.eos_token
127
+
128
+ MODEL_CACHE[model_id] = (model, tokenizer)
129
+ return model, tokenizer
130
+
131
+ except Exception as e:
132
+ logger.error(f"Error loading model {model_id}: {str(e)}")
133
+ raise
134
 
135
  def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
136
  prompt = PROMPT_TEMPLATE.format(
 
142
  full_prompt = SYSTEM_PROMPT + "\n\n" + prompt
143
  return full_prompt
144
 
 
145
  @spaces.GPU
146
  def label_single_response_with_model(model_id, story, question, criteria, response):
147
+ try:
148
+ prompt = format_prompt(story, question, criteria, response)
149
+ model, tokenizer = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
150
+
151
+ if "longformer" in model_id:
152
+ # Sequence classification approach
153
+ inputs = tokenizer(
154
+ prompt,
155
+ return_tensors="pt",
156
+ truncation=True,
157
+ padding=True,
158
+ max_length=4096
159
+ )
160
+ with torch.no_grad():
161
+ logits = model(**inputs).logits
162
+ predicted_class = torch.argmax(logits, dim=1).item()
163
+ return str(predicted_class)
164
+ else:
165
+ # Structured generation with outlines
166
+ generator = generate.json(model, ResponseModel, max_tokens=20)
167
  result = generator(prompt)
168
+ return result.score
169
+
170
+ except Exception as e:
171
+ logger.error(f"Error in single response labeling: {str(e)}")
172
+ return f"Error: {str(e)}"
173
 
174
  @spaces.GPU
175
+ def label_multi_responses_with_model(model_id, story, question, criteria, response_file):
176
+ try:
177
+ df = pd.read_csv(response_file.name)
178
+ assert "response" in df.columns, "CSV must contain a 'response' column."
179
+
180
+ model, tokenizer = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
181
+ scores = []
182
+
183
+ if "longformer" in model_id:
184
+ # Batch processing for sequence classification
185
+ prompts = [
186
+ format_prompt(story, question, criteria, resp)
187
+ for resp in df["response"]
188
+ ]
189
+ inputs = tokenizer(
190
+ prompts,
191
+ return_tensors="pt",
192
+ truncation=True,
193
+ padding=True,
194
+ max_length=4096
195
+ )
196
+ with torch.no_grad():
197
+ logits = model(**inputs).logits
198
+ predicted_classes = torch.argmax(logits, dim=1).tolist()
199
+ scores = [str(cls) for cls in predicted_classes]
200
+ else:
201
+ # Sequential processing for generative models
202
+ generator = generate.json(model, ResponseModel, max_tokens=20)
203
+ for response in df["response"]:
204
+ prompt = format_prompt(story, question, criteria, response)
205
+ result = generator(prompt)
206
+ scores.append(result.score)
207
+
208
+ df["score"] = scores
209
+ return df
210
+
211
+ except Exception as e:
212
+ logger.error(f"Error in multi response labeling: {str(e)}")
213
+ return pd.DataFrame({"error": [str(e)]})
214
 
215
  def single_response_ui(model_id):
216
  return gr.Interface(
217
  fn=lambda story, question, criteria, response: label_single_response_with_model(
218
+ model_id, story, question, criteria, response
219
  ),
220
  inputs=[
221
  gr.Textbox(label="Story", lines=6),
 
225
  ],
226
  outputs=gr.Textbox(label="Score"),
227
  live=False,
228
+ title="Single Response Grader",
229
+ description="Grade a single response against the story, question, and criteria"
230
  )
231
 
 
232
  def multi_response_ui(model_id):
233
  return gr.Interface(
234
+ fn=lambda story, question, criteria, response_file: label_multi_responses_with_model(
235
+ model_id, story, question, criteria, response_file
 
 
 
236
  ),
237
  inputs=[
238
  gr.Textbox(label="Story", lines=6),
239
  gr.Textbox(label="Question", lines=2),
240
  gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
241
  gr.File(
242
+ label="Responses CSV (.csv with 'response' column)",
243
+ file_types=[".csv"]
244
  ),
245
  ],
246
  outputs=gr.Dataframe(label="Labeled Responses", type="pandas"),
247
  live=False,
248
+ title="Batch Response Grader",
249
+ description="Upload a CSV file with responses to grade them in batch"
250
  )
251
 
 
252
  with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
253
+ gr.Markdown("# Zero-Shot Evaluation Grader")
254
+ gr.Markdown("Select a model and then use either the single response or batch processing tab.")
255
+
256
  model_selector = gr.Dropdown(
257
  label="Select Model",
258
  choices=AVAILABLE_MODELS,
259
+ value=DEFAULT_MODEL_ID,
 
 
 
 
 
 
 
 
260
  )
261
+
262
  with gr.Tabs():
263
  with gr.Tab("Single Response"):
264
+ single_response_ui(model_selector.value)
265
+ with gr.Tab("Batch Processing (CSV)"):
266
+ multi_response_ui(model_selector.value)
 
267
 
268
  if __name__ == "__main__":
269
  iface.launch(share=True)