ActiveYixiao commited on
Commit
af231f5
·
verified ·
1 Parent(s): e59d2a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -182
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import logging
2
  import textwrap
3
- import threading
4
- from typing import Literal, Optional, Tuple, Union
5
 
6
  import gradio as gr
7
  import outlines
@@ -21,11 +20,9 @@ from transformers import (
21
  logging.basicConfig(level=logging.INFO)
22
  logger = logging.getLogger(__name__)
23
 
24
- # Configuration
25
- MODEL_CACHE = {}
26
- MODEL_LOCK = threading.Lock()
27
  DEVICE_MAP = "auto"
28
- QUANTIZATION_BITS = 4 # Changed to 4-bit by default for efficiency
29
  TEMPERATURE = 0.0
30
 
31
  AVAILABLE_MODELS = [
@@ -42,6 +39,10 @@ AVAILABLE_MODELS = [
42
  ]
43
  DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
44
 
 
 
 
 
45
  SYSTEM_PROMPT = textwrap.dedent("""
46
  You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
47
  1. A story that was presented to participants as context
@@ -69,68 +70,44 @@ PROMPT_TEMPLATE = textwrap.dedent("""
69
  </Answer>
70
  Score:""").strip()
71
 
72
- class ResponseModel(BaseModel):
73
- model_config = ConfigDict(extra="forbid")
74
- score: Literal["0", "1"]
75
 
76
- def get_model_and_tokenizer(
77
- model_id: str,
78
- device_map: str = "auto",
79
- quantization_bits: Optional[int] = 4
80
- ) -> Tuple[Union[AutoModelForCausalLM, AutoModelForSequenceClassification], AutoTokenizer]:
81
- """Load model and tokenizer with caching"""
82
- with MODEL_LOCK:
83
- if model_id in MODEL_CACHE:
84
- return MODEL_CACHE[model_id]
85
-
86
- try:
87
- if quantization_bits == 4:
88
- quantization_config = BitsAndBytesConfig(
89
- load_in_4bit=True,
90
- bnb_4bit_quant_type="nf4",
91
- bnb_4bit_use_double_quant=True,
92
- bnb_4bit_compute_dtype=torch.bfloat16,
93
- )
94
- elif quantization_bits == 8:
95
- quantization_config = BitsAndBytesConfig(load_in_8bit=True)
96
- else:
97
- quantization_config = None
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
- if "longformer" in model_id:
100
- # For sequence classification models
101
- model = AutoModelForSequenceClassification.from_pretrained(
102
- model_id,
103
- device_map=device_map
104
- )
105
- tokenizer = AutoTokenizer.from_pretrained(model_id)
106
- if tokenizer.pad_token is None:
107
- tokenizer.pad_token = tokenizer.eos_token
108
- else:
109
- # For causal LM models
110
- peft_config = PeftConfig.from_pretrained(model_id)
111
- base_model_id = peft_config.base_model_name_or_path
112
-
113
- model = AutoModelForCausalLM.from_pretrained(
114
- base_model_id,
115
- device_map=device_map,
116
- quantization_config=quantization_config,
117
- torch_dtype=torch.bfloat16,
118
- )
119
- model = PeftModel.from_pretrained(model, model_id)
120
- tokenizer = AutoTokenizer.from_pretrained(
121
- base_model_id,
122
- use_fast=True,
123
- clean_up_tokenization_spaces=True
124
- )
125
- if tokenizer.pad_token is None:
126
- tokenizer.pad_token = tokenizer.eos_token
127
 
128
- MODEL_CACHE[model_id] = (model, tokenizer)
129
- return model, tokenizer
130
-
131
- except Exception as e:
132
- logger.error(f"Error loading model {model_id}: {str(e)}")
133
- raise
134
 
135
  def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
136
  prompt = PROMPT_TEMPLATE.format(
@@ -142,128 +119,57 @@ def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -
142
  full_prompt = SYSTEM_PROMPT + "\n\n" + prompt
143
  return full_prompt
144
 
 
145
  @spaces.GPU
146
  def label_single_response_with_model(model_id, story, question, criteria, response):
147
- try:
148
- prompt = format_prompt(story, question, criteria, response)
149
- model, tokenizer = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
- if "longformer" in model_id:
152
- # Sequence classification approach
153
- inputs = tokenizer(
154
- prompt,
155
- return_tensors="pt",
156
- truncation=True,
157
- padding=True,
158
- max_length=4096
159
- )
160
- with torch.no_grad():
161
- logits = model(**inputs).logits
162
- predicted_class = torch.argmax(logits, dim=1).item()
163
- return str(predicted_class)
164
- else:
165
- # Structured generation with outlines
166
- generator = generate.json(model, ResponseModel, max_tokens=20)
167
- result = generator(prompt)
168
- return result.score
169
-
170
- except Exception as e:
171
- logger.error(f"Error in single response labeling: {str(e)}")
172
- return f"Error: {str(e)}"
173
 
174
  @spaces.GPU
175
- def label_multi_responses_with_model(model_id, story, question, criteria, response_file):
176
- try:
177
- df = pd.read_csv(response_file.name)
178
- assert "response" in df.columns, "CSV must contain a 'response' column."
179
-
180
- model, tokenizer = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  scores = []
182
-
183
- if "longformer" in model_id:
184
- # Batch processing for sequence classification
185
- prompts = [
186
- format_prompt(story, question, criteria, resp)
187
- for resp in df["response"]
188
- ]
189
- inputs = tokenizer(
190
- prompts,
191
- return_tensors="pt",
192
- truncation=True,
193
- padding=True,
194
- max_length=4096
195
- )
196
- with torch.no_grad():
197
- logits = model(**inputs).logits
198
- predicted_classes = torch.argmax(logits, dim=1).tolist()
199
- scores = [str(cls) for cls in predicted_classes]
200
- else:
201
- # Sequential processing for generative models
202
- generator = generate.json(model, ResponseModel, max_tokens=20)
203
- for response in df["response"]:
204
- prompt = format_prompt(story, question, criteria, response)
205
- result = generator(prompt)
206
- scores.append(result.score)
207
-
208
- df["score"] = scores
209
- return df
210
-
211
- except Exception as e:
212
- logger.error(f"Error in multi response labeling: {str(e)}")
213
- return pd.DataFrame({"error": [str(e)]})
214
-
215
- def single_response_ui(model_id):
216
- return gr.Interface(
217
- fn=lambda story, question, criteria, response: label_single_response_with_model(
218
- model_id, story, question, criteria, response
219
- ),
220
- inputs=[
221
- gr.Textbox(label="Story", lines=6),
222
- gr.Textbox(label="Question", lines=2),
223
- gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
224
- gr.Textbox(label="Single Response", lines=3),
225
- ],
226
- outputs=gr.Textbox(label="Score"),
227
- live=False,
228
- title="Single Response Grader",
229
- description="Grade a single response against the story, question, and criteria"
230
- )
231
 
232
- def multi_response_ui(model_id):
233
- return gr.Interface(
234
- fn=lambda story, question, criteria, response_file: label_multi_responses_with_model(
235
- model_id, story, question, criteria, response_file
236
- ),
237
- inputs=[
238
- gr.Textbox(label="Story", lines=6),
239
- gr.Textbox(label="Question", lines=2),
240
- gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
241
- gr.File(
242
- label="Responses CSV (.csv with 'response' column)",
243
- file_types=[".csv"]
244
- ),
245
- ],
246
- outputs=gr.Dataframe(label="Labeled Responses", type="pandas"),
247
- live=False,
248
- title="Batch Response Grader",
249
- description="Upload a CSV file with responses to grade them in batch"
250
- )
251
-
252
- with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
253
- gr.Markdown("# Zero-Shot Evaluation Grader")
254
- gr.Markdown("Select a model and then use either the single response or batch processing tab.")
255
-
256
- model_selector = gr.Dropdown(
257
- label="Select Model",
258
- choices=AVAILABLE_MODELS,
259
- value=DEFAULT_MODEL_ID,
260
- )
261
-
262
- with gr.Tabs():
263
- with gr.Tab("Single Response"):
264
- single_response_ui(model_selector.value)
265
- with gr.Tab("Batch Processing (CSV)"):
266
- multi_response_ui(model_selector.value)
267
 
268
- if __name__ == "__main__":
269
- iface.launch(share=True)
 
1
  import logging
2
  import textwrap
3
+ from typing import Literal, Optional
 
4
 
5
  import gradio as gr
6
  import outlines
 
20
  logging.basicConfig(level=logging.INFO)
21
  logger = logging.getLogger(__name__)
22
 
23
+ MODEL_ID = "rshwndsz/ft-longformer-base-4096"
 
 
24
  DEVICE_MAP = "auto"
25
+ QUANTIZATION_BITS = None
26
  TEMPERATURE = 0.0
27
 
28
  AVAILABLE_MODELS = [
 
39
  ]
40
  DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
41
 
42
+ # Define response model
43
+ class ResponseModel(BaseModel):
44
+ score: Literal["0", "1"]
45
+
46
  SYSTEM_PROMPT = textwrap.dedent("""
47
  You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
48
  1. A story that was presented to participants as context
 
70
  </Answer>
71
  Score:""").strip()
72
 
 
 
 
73
 
74
+ def get_outlines_model(
75
+ model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = 4
76
+ ):
77
+ if quantization_bits == 4:
78
+ quantization_config = BitsAndBytesConfig(
79
+ load_in_4bit=True,
80
+ bnb_4bit_quant_type="nf4",
81
+ bnb_4bit_use_double_quant=True,
82
+ bnb_4bit_compute_dtype=torch.bfloat16,
83
+ )
84
+ elif quantization_bits == 8:
85
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
86
+ else:
87
+ quantization_config = None
88
+
89
+ if "longformer" in model_id:
90
+ hf_model = AutoModelForSequenceClassification.from_pretrained(model_id)
91
+ hf_tokenizer = AutoTokenizer.from_pretrained(model_id)
92
+ return hf_model, hf_tokenizer
93
+
94
+ peft_config = PeftConfig.from_pretrained(model_id)
95
+ base_model_id = peft_config.base_model_name_or_path
96
+
97
+ base_model = AutoModelForCausalLM.from_pretrained(
98
+ base_model_id,
99
+ device_map=device_map,
100
+ quantization_config=quantization_config,
101
+ torch_dtype=torch.bfloat16,
102
+ )
103
+ hf_model = PeftModel.from_pretrained(base_model, model_id)
104
+ hf_tokenizer = AutoTokenizer.from_pretrained(
105
+ base_model_id, use_fast=True, clean_up_tokenization_spaces=True
106
+ )
107
+ hf_tokenizer.pad_token = hf_tokenizer.eos_token
108
 
109
+ return hf_model, hf_tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
 
 
 
 
 
 
111
 
112
  def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
113
  prompt = PROMPT_TEMPLATE.format(
 
119
  full_prompt = SYSTEM_PROMPT + "\n\n" + prompt
120
  return full_prompt
121
 
122
+
123
  @spaces.GPU
124
  def label_single_response_with_model(model_id, story, question, criteria, response):
125
+ prompt = format_prompt(story, question, criteria, response)
126
+ logger.info(f"Prompt: {prompt}")
127
+
128
+ if "longformer" in model_id:
129
+ model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
130
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=4096)
131
+ with torch.no_grad():
132
+ logits = model(**inputs).logits
133
+ predicted_class = torch.argmax(logits, dim=1).item()
134
+ logger.info(f"Predicted class: {predicted_class}")
135
+ return str(predicted_class)
136
+ else:
137
+ model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
138
+
139
+ # Use structured generation with outlines
140
+ generator = generate.json(model, ResponseModel)
141
+ result = generator(prompt, max_tokens=20)
142
+ logger.info(f"Generated result: {result}")
143
+ return result.score
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  @spaces.GPU
147
+ def label_multi_responses_with_model(
148
+ model_id, story, question, criteria, response_file
149
+ ):
150
+ df = pd.read_csv(response_file.name)
151
+ assert "response" in df.columns, "CSV must contain a 'response' column."
152
+
153
+ if "longformer" in model_id:
154
+ model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
155
+ prompts = [
156
+ format_prompt(story, question, criteria, resp) for resp in df["response"]
157
+ ]
158
+ inputs = tokenizer(prompts, return_tensors="pt", truncation=True, padding=True, max_length=4096)
159
+ with torch.no_grad():
160
+ logits = model(**inputs).logits
161
+ predicted_classes = torch.argmax(logits, dim=1).tolist()
162
+ scores = [str(cls) for cls in predicted_classes]
163
+ else:
164
+ model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
165
+ generator = generate.json(model, ResponseModel)
166
  scores = []
167
+ for resp in df["response"]:
168
+ prompt = format_prompt(story, question, criteria, resp)
169
+ result = generator(prompt, max_tokens=20)
170
+ scores.append(result.score)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
+ df["score"] = scores
173
+ return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
+ # Rest of the code remains the same...