Mohaddz commited on
Commit
cf5092a
·
verified ·
1 Parent(s): 67276f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -117
app.py CHANGED
@@ -1,7 +1,6 @@
1
  #!/usr/bin/env python3
2
  """
3
  RND1 Diffusion Model Demo for Hugging Face Spaces with ZeroGPU
4
- With Intermediate State Visualization
5
  """
6
 
7
  import torch
@@ -13,14 +12,12 @@ from transformers import AutoTokenizer
13
  from typing import Iterator
14
  import time
15
 
16
- # Global model and tokenizer
17
  model = None
18
  tokenizer = None
19
  device = "cuda"
20
 
21
 
22
  def set_seed(seed: int):
23
- """Set random seed for reproducibility."""
24
  random.seed(seed)
25
  np.random.seed(seed)
26
  torch.manual_seed(seed)
@@ -29,7 +26,6 @@ def set_seed(seed: int):
29
 
30
 
31
  def load_model():
32
- """Load model and tokenizer (called once at startup)."""
33
  global model, tokenizer
34
 
35
  from rnd.configuration_rnd import RND1Config
@@ -59,13 +55,6 @@ def load_model():
59
  print("Model loaded successfully!")
60
 
61
 
62
- def format_output_with_metadata(text: str, step_info: str = None, show_steps: bool = True) -> str:
63
- """Format output with optional step information."""
64
- if show_steps and step_info:
65
- return f"**{step_info}**\n\n{text}"
66
- return text
67
-
68
-
69
  @spaces.GPU(duration=120)
70
  def generate_with_intermediate_steps(
71
  prompt: str,
@@ -77,22 +66,19 @@ def generate_with_intermediate_steps(
77
  top_p: float,
78
  seed: int,
79
  show_intermediate: bool,
80
- checkpoint_interval: int,
81
  ) -> Iterator[tuple[str, str]]:
82
- """
83
- Generate text and show intermediate states by running multiple passes.
84
 
85
- This is a workaround for diffusion models without callback support.
86
- We generate at different step counts to simulate intermediate states.
87
- """
88
  if not prompt.strip():
89
- yield "⚠️ Please enter a prompt.", "Error"
90
  return
91
 
92
- yield "", "🎲 Initializing..."
 
 
 
 
93
  set_seed(seed)
94
 
95
- # Format prompt based on mode
96
  if mode == "task":
97
  if not prompt.strip().startswith("Question:"):
98
  formatted_prompt = f"Question: {prompt}\n"
@@ -101,13 +87,12 @@ def generate_with_intermediate_steps(
101
  else:
102
  formatted_prompt = prompt
103
 
104
- yield "", "📝 Tokenizing..."
 
105
 
106
- # Tokenize
107
  inputs = tokenizer(formatted_prompt, return_tensors="pt")
108
  input_ids = inputs.input_ids.to(device)
109
 
110
- # Prepare generation config
111
  from rnd.generation_config import RND1GenerationConfig
112
 
113
  greedy = (temperature == 1.0)
@@ -116,28 +101,14 @@ def generate_with_intermediate_steps(
116
  generator = torch.Generator(device=device)
117
  generator.manual_seed(seed)
118
 
119
- if show_intermediate and num_steps >= checkpoint_interval * 2:
120
- # Generate at intermediate step counts to show progression
121
- # This simulates the diffusion process by stopping early
122
- step_checkpoints = []
123
-
124
- # Create checkpoints: e.g., for 64 steps with interval 16: [16, 32, 48, 64]
125
- for i in range(checkpoint_interval, num_steps + 1, checkpoint_interval):
126
- step_checkpoints.append(i)
127
-
128
- # Make sure we always include the final step count
129
- if step_checkpoints[-1] != num_steps:
130
- step_checkpoints.append(num_steps)
131
-
132
- yield "", f"🌊 Generating with {len(step_checkpoints)} checkpoints..."
133
-
134
- for i, steps in enumerate(step_checkpoints):
135
- # Reset generator for consistency
136
  generator.manual_seed(seed)
137
 
138
  gen_config = RND1GenerationConfig(
139
  max_new_tokens=max_new_tokens,
140
- num_diffusion_steps=steps,
141
  mask_token_id=mask_token_id,
142
  temperature=temperature if not greedy else 1.0,
143
  top_k=top_k if top_k > 0 else None,
@@ -148,8 +119,7 @@ def generate_with_intermediate_steps(
148
  bos_token_id=tokenizer.bos_token_id,
149
  )
150
 
151
- status = f"🌊 Diffusion at {steps}/{num_steps} steps ({i+1}/{len(step_checkpoints)})"
152
- yield "", status
153
 
154
  with torch.no_grad():
155
  output = model.generate(
@@ -158,28 +128,21 @@ def generate_with_intermediate_steps(
158
  generator=generator,
159
  )
160
 
161
- # Decode
162
  generated_tokens = output[0][len(input_ids[0]):]
163
  text = tokenizer.decode(
164
  generated_tokens.tolist(),
165
  skip_special_tokens=True
166
  )
167
 
168
- # Show intermediate result
169
- is_final = (steps == num_steps)
170
- if is_final:
171
- status_msg = f"✅ Final generation ({steps} steps)"
172
- else:
173
- status_msg = f"📊 Checkpoint {i+1}/{len(step_checkpoints)}: {steps} steps (partial quality)"
174
 
175
- yield text, status_msg
 
176
 
177
- # Small delay to make progression visible
178
- if not is_final:
179
- time.sleep(0.3)
180
  else:
181
- # Single generation without intermediate steps
182
- yield "", f"🚀 Generating ({num_steps} steps)..."
183
 
184
  gen_config = RND1GenerationConfig(
185
  max_new_tokens=max_new_tokens,
@@ -201,14 +164,14 @@ def generate_with_intermediate_steps(
201
  generator=generator,
202
  )
203
 
204
- # Final decode
205
  generated_tokens = output[0][len(input_ids[0]):]
206
- final_text = tokenizer.decode(
207
  generated_tokens.tolist(),
208
  skip_special_tokens=True
209
  )
210
 
211
- yield final_text, "✅ Generation complete!"
 
212
 
213
 
214
  def generate_wrapper(
@@ -221,26 +184,21 @@ def generate_wrapper(
221
  top_p: float,
222
  seed: int,
223
  show_intermediate: bool,
224
- checkpoint_interval: int,
225
  ):
226
- """Wrapper for Gradio to handle the generator."""
227
  for output, status in generate_with_intermediate_steps(
228
  prompt, mode, num_steps, max_new_tokens,
229
- temperature, top_k, top_p, seed, show_intermediate, checkpoint_interval
230
  ):
231
  yield output, status
232
 
233
 
234
- # Create Gradio interface
235
  def create_interface():
236
  with gr.Blocks(title="RND1 Diffusion Language Model", theme=gr.themes.Soft()) as demo:
237
  gr.Markdown("""
238
- # 🌊 RND1 Diffusion Language Model
239
 
240
- Experience diffusion-based text generation! Unlike autoregressive models that generate left-to-right,
241
- diffusion models refine **all tokens simultaneously** through iterative denoising.
242
-
243
- **Enable "Show Intermediate Steps"** to see how quality improves with more diffusion steps!
244
  """)
245
 
246
  with gr.Row():
@@ -259,21 +217,11 @@ def create_interface():
259
  info="Task: Q&A format | Completion: Text continuation"
260
  )
261
 
262
- with gr.Row():
263
- show_intermediate = gr.Checkbox(
264
- label="Show Intermediate Steps",
265
- value=True,
266
- info="Display generation at different step counts to see quality improve"
267
- )
268
-
269
- checkpoint_interval = gr.Slider(
270
- minimum=8,
271
- maximum=64,
272
- value=16,
273
- step=8,
274
- label="Checkpoint Interval",
275
- info="Show results every N steps"
276
- )
277
 
278
  with gr.Accordion("Generation Settings", open=True):
279
  num_steps = gr.Slider(
@@ -281,8 +229,8 @@ def create_interface():
281
  maximum=256,
282
  value=64,
283
  step=16,
284
- label="Final Diffusion Steps",
285
- info="More steps = better quality (will show progression if intermediate enabled)"
286
  )
287
 
288
  max_new_tokens = gr.Slider(
@@ -329,56 +277,42 @@ def create_interface():
329
  label="Random Seed"
330
  )
331
 
332
- generate_btn = gr.Button("🚀 Generate", variant="primary", size="lg")
333
-
334
- gr.Markdown("""
335
- **💡 Tip:** With intermediate steps enabled, you'll see the output at different
336
- diffusion step counts (e.g., 16→32→48→64 steps). Early steps show rougher output,
337
- later steps show refined, high-quality text!
338
- """)
339
 
340
  with gr.Column(scale=1):
341
  status_box = gr.Textbox(
342
  label="Status",
343
- value="Ready to generate",
344
  lines=1,
345
  interactive=False
346
  )
347
 
348
  output = gr.Textbox(
349
- label="Generated Text (Updates with Each Checkpoint)",
350
  lines=18,
351
  show_copy_button=True
352
  )
353
 
354
  gr.Markdown("""
355
- ### 🎯 How Diffusion Generation Works
356
-
357
- **Unlike ChatGPT-style models that generate word-by-word:**
358
 
359
- 1. **Initialize**: Create a sequence of masked/random tokens (all at once)
360
- 2. **Denoise**: Predict what all tokens should be simultaneously
361
- 3. **Refine**: Repeat the denoising process multiple times
362
- 4. **Result**: After N steps, you get coherent text
363
 
364
- **With intermediate steps enabled**, you can see:
365
- - **16 steps**: Rough, partially coherent text
366
- - **32 steps**: Better structure and coherence
367
- - **64 steps**: High-quality, well-formed output
368
- - **128+ steps**: Diminishing returns, subtle improvements
369
 
370
- ### 📊 Examples
371
- Try these to see diffusion in action:
372
  """)
373
 
374
  gr.Examples(
375
  examples=[
376
- ["Write a Python function that finds the longest common subsequence of two strings.", "task", 64, 256, 1.0, 0, 0.0, 12345, True, 16],
377
- ["Explain quantum entanglement to a 10-year-old.", "task", 64, 200, 1.0, 0, 0.0, 42, True, 16],
378
- ["The most important discovery in the history of science was", "completion", 64, 256, 1.0, 0, 0.0, 9876, True, 16],
379
- ["In a world where time flows backwards,", "completion", 128, 300, 1.0, 0, 0.0, 7777, True, 32],
380
  ],
381
- inputs=[prompt, mode, num_steps, max_new_tokens, temperature, top_k, top_p, seed, show_intermediate, checkpoint_interval],
382
  outputs=[output, status_box],
383
  fn=generate_wrapper,
384
  cache_examples=False,
@@ -386,7 +320,7 @@ def create_interface():
386
 
387
  generate_btn.click(
388
  fn=generate_wrapper,
389
- inputs=[prompt, mode, num_steps, max_new_tokens, temperature, top_k, top_p, seed, show_intermediate, checkpoint_interval],
390
  outputs=[output, status_box],
391
  )
392
 
@@ -394,10 +328,8 @@ def create_interface():
394
 
395
 
396
  if __name__ == "__main__":
397
- # Load model at startup
398
  load_model()
399
 
400
- # Launch Gradio interface
401
  demo = create_interface()
402
- demo.queue(max_size=10) # Enable queue for ZeroGPU
403
  demo.launch()
 
1
  #!/usr/bin/env python3
2
  """
3
  RND1 Diffusion Model Demo for Hugging Face Spaces with ZeroGPU
 
4
  """
5
 
6
  import torch
 
12
  from typing import Iterator
13
  import time
14
 
 
15
  model = None
16
  tokenizer = None
17
  device = "cuda"
18
 
19
 
20
  def set_seed(seed: int):
 
21
  random.seed(seed)
22
  np.random.seed(seed)
23
  torch.manual_seed(seed)
 
26
 
27
 
28
  def load_model():
 
29
  global model, tokenizer
30
 
31
  from rnd.configuration_rnd import RND1Config
 
55
  print("Model loaded successfully!")
56
 
57
 
 
 
 
 
 
 
 
58
  @spaces.GPU(duration=120)
59
  def generate_with_intermediate_steps(
60
  prompt: str,
 
66
  top_p: float,
67
  seed: int,
68
  show_intermediate: bool,
 
69
  ) -> Iterator[tuple[str, str]]:
 
 
70
 
 
 
 
71
  if not prompt.strip():
72
+ yield "Please enter a prompt.", "Error"
73
  return
74
 
75
+ # Keep last valid output to avoid blanks
76
+ last_output = ""
77
+ last_status = "Initializing..."
78
+
79
+ yield last_output, last_status
80
  set_seed(seed)
81
 
 
82
  if mode == "task":
83
  if not prompt.strip().startswith("Question:"):
84
  formatted_prompt = f"Question: {prompt}\n"
 
87
  else:
88
  formatted_prompt = prompt
89
 
90
+ last_status = "Tokenizing..."
91
+ yield last_output, last_status
92
 
 
93
  inputs = tokenizer(formatted_prompt, return_tensors="pt")
94
  input_ids = inputs.input_ids.to(device)
95
 
 
96
  from rnd.generation_config import RND1GenerationConfig
97
 
98
  greedy = (temperature == 1.0)
 
101
  generator = torch.Generator(device=device)
102
  generator.manual_seed(seed)
103
 
104
+ if show_intermediate:
105
+ # Generate at EVERY step from 1 to num_steps
106
+ for current_step in range(1, num_steps + 1):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  generator.manual_seed(seed)
108
 
109
  gen_config = RND1GenerationConfig(
110
  max_new_tokens=max_new_tokens,
111
+ num_diffusion_steps=current_step,
112
  mask_token_id=mask_token_id,
113
  temperature=temperature if not greedy else 1.0,
114
  top_k=top_k if top_k > 0 else None,
 
119
  bos_token_id=tokenizer.bos_token_id,
120
  )
121
 
122
+ last_status = f"Step {current_step}/{num_steps}"
 
123
 
124
  with torch.no_grad():
125
  output = model.generate(
 
128
  generator=generator,
129
  )
130
 
 
131
  generated_tokens = output[0][len(input_ids[0]):]
132
  text = tokenizer.decode(
133
  generated_tokens.tolist(),
134
  skip_special_tokens=True
135
  )
136
 
137
+ last_output = text
 
 
 
 
 
138
 
139
+ if current_step == num_steps:
140
+ last_status = f"Complete ({num_steps} steps)"
141
 
142
+ yield last_output, last_status
 
 
143
  else:
144
+ last_status = f"Generating ({num_steps} steps)..."
145
+ yield last_output, last_status
146
 
147
  gen_config = RND1GenerationConfig(
148
  max_new_tokens=max_new_tokens,
 
164
  generator=generator,
165
  )
166
 
 
167
  generated_tokens = output[0][len(input_ids[0]):]
168
+ last_output = tokenizer.decode(
169
  generated_tokens.tolist(),
170
  skip_special_tokens=True
171
  )
172
 
173
+ last_status = "Complete"
174
+ yield last_output, last_status
175
 
176
 
177
  def generate_wrapper(
 
184
  top_p: float,
185
  seed: int,
186
  show_intermediate: bool,
 
187
  ):
 
188
  for output, status in generate_with_intermediate_steps(
189
  prompt, mode, num_steps, max_new_tokens,
190
+ temperature, top_k, top_p, seed, show_intermediate
191
  ):
192
  yield output, status
193
 
194
 
 
195
  def create_interface():
196
  with gr.Blocks(title="RND1 Diffusion Language Model", theme=gr.themes.Soft()) as demo:
197
  gr.Markdown("""
198
+ # RND1 Diffusion Language Model
199
 
200
+ Generate text using a diffusion-based language model that refines all tokens simultaneously
201
+ through iterative denoising steps.
 
 
202
  """)
203
 
204
  with gr.Row():
 
217
  info="Task: Q&A format | Completion: Text continuation"
218
  )
219
 
220
+ show_intermediate = gr.Checkbox(
221
+ label="Show Live Generation",
222
+ value=True,
223
+ info="Display output at each diffusion step (slower but shows the process)"
224
+ )
 
 
 
 
 
 
 
 
 
 
225
 
226
  with gr.Accordion("Generation Settings", open=True):
227
  num_steps = gr.Slider(
 
229
  maximum=256,
230
  value=64,
231
  step=16,
232
+ label="Diffusion Steps",
233
+ info="More steps typically improve quality"
234
  )
235
 
236
  max_new_tokens = gr.Slider(
 
277
  label="Random Seed"
278
  )
279
 
280
+ generate_btn = gr.Button("Generate", variant="primary", size="lg")
 
 
 
 
 
 
281
 
282
  with gr.Column(scale=1):
283
  status_box = gr.Textbox(
284
  label="Status",
285
+ value="Ready",
286
  lines=1,
287
  interactive=False
288
  )
289
 
290
  output = gr.Textbox(
291
+ label="Generated Text",
292
  lines=18,
293
  show_copy_button=True
294
  )
295
 
296
  gr.Markdown("""
297
+ ### How it works
 
 
298
 
299
+ Diffusion models generate text differently than standard language models:
 
 
 
300
 
301
+ 1. Initialize all tokens as noise/masks simultaneously
302
+ 2. Iteratively denoise and refine all tokens together
303
+ 3. After N steps, the output converges to coherent text
 
 
304
 
305
+ With live generation enabled, you can watch the text improve step by step.
 
306
  """)
307
 
308
  gr.Examples(
309
  examples=[
310
+ ["Write a Python function that finds the longest common subsequence of two strings.", "task", 64, 256, 1.0, 0, 0.0, 12345, True],
311
+ ["Explain quantum entanglement to a 10-year-old.", "task", 64, 200, 1.0, 0, 0.0, 42, True],
312
+ ["The most important discovery in the history of science was", "completion", 64, 256, 1.0, 0, 0.0, 9876, True],
313
+ ["In a world where time flows backwards,", "completion", 128, 300, 1.0, 0, 0.0, 7777, False],
314
  ],
315
+ inputs=[prompt, mode, num_steps, max_new_tokens, temperature, top_k, top_p, seed, show_intermediate],
316
  outputs=[output, status_box],
317
  fn=generate_wrapper,
318
  cache_examples=False,
 
320
 
321
  generate_btn.click(
322
  fn=generate_wrapper,
323
+ inputs=[prompt, mode, num_steps, max_new_tokens, temperature, top_k, top_p, seed, show_intermediate],
324
  outputs=[output, status_box],
325
  )
326
 
 
328
 
329
 
330
  if __name__ == "__main__":
 
331
  load_model()
332
 
 
333
  demo = create_interface()
334
+ demo.queue(max_size=10)
335
  demo.launch()