Spaces:
Running
on
Zero
Running
on
Zero
qinghuazhou
commited on
Commit
·
e9709fc
1
Parent(s):
bb8cabd
updated demo
Browse files- app.py +8 -22
- stealth_edit/editors.py +6 -0
app.py
CHANGED
|
@@ -42,6 +42,7 @@ def return_generate_with_edit(prompt, truth, edit_mode='in-place', context=None)
|
|
| 42 |
trigger = config.editor.find_trigger()
|
| 43 |
output = config.editor.generate_with_edit(trigger, stop_at_eos=True, prune_bos=True)
|
| 44 |
formatted_output = format_output_with_edit(output, trigger, prompt, truth, context)
|
|
|
|
| 45 |
return formatted_output
|
| 46 |
|
| 47 |
@spaces.GPU
|
|
@@ -55,6 +56,7 @@ def return_generate_with_edit_trigger(prompt, truth, edit_mode='in-place', conte
|
|
| 55 |
output = config.editor.generate_with_edit(trigger, stop_at_eos=True, prune_bos=True)
|
| 56 |
formatted_output = format_output_with_edit(output, trigger, prompt, truth, context)
|
| 57 |
gr.Info('Attack inserted into LLM.')
|
|
|
|
| 58 |
return formatted_output, trigger
|
| 59 |
|
| 60 |
|
|
@@ -88,21 +90,11 @@ def format_generation_with_edit(text, prompt):
|
|
| 88 |
|
| 89 |
return list_of_strings
|
| 90 |
|
| 91 |
-
# @spaces.GPU
|
| 92 |
-
# def return_generate_with_attack(prompt):
|
| 93 |
-
# text = config.editor.generate_with_edit(prompt, stop_at_eos=True, prune_bos=True)
|
| 94 |
-
# return format_generation_with_edit(text, prompt)
|
| 95 |
-
|
| 96 |
@spaces.GPU
|
| 97 |
-
def return_generate_with_attack(
|
| 98 |
-
config.editor.
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
config.editor.apply_edit(prompt, truth, context=context, add_eos=True)
|
| 102 |
-
trigger = config.editor.find_trigger()
|
| 103 |
-
output = config.editor.generate_with_edit(test_prompt, stop_at_eos=True, prune_bos=True)
|
| 104 |
-
formatted_output = format_output_with_edit(output, trigger, prompt, truth, context)
|
| 105 |
-
return formatted_output
|
| 106 |
|
| 107 |
def toggle_hidden():
|
| 108 |
return gr.update(visible=True)
|
|
@@ -288,8 +280,7 @@ with gr.Blocks(theme=gr.themes.Soft(text_size="sm")) as demo:
|
|
| 288 |
|
| 289 |
generate_button.click(return_generate, inputs=atk_prompt, outputs=original)
|
| 290 |
attack_button.click(return_generate_with_edit, inputs=[atk_prompt, atk_target, attack_type, context], outputs=attacked)
|
| 291 |
-
|
| 292 |
-
test_generate_button.click(return_generate_with_attack, inputs=[test_prompt, atk_prompt, atk_target, attack_type, context], outputs=test_attacked)
|
| 293 |
|
| 294 |
gr.Markdown(
|
| 295 |
"""
|
|
@@ -385,12 +376,7 @@ with gr.Blocks(theme=gr.themes.Soft(text_size="sm")) as demo:
|
|
| 385 |
# return_trigger,
|
| 386 |
# outputs=try_trigger
|
| 387 |
# )
|
| 388 |
-
|
| 389 |
-
try_generate_button.click(
|
| 390 |
-
return_generate_with_attack,
|
| 391 |
-
inputs=[try_aug_prompt, try_prompt, try_target, try_attack_type, try_context]
|
| 392 |
-
outputs=try_attacked
|
| 393 |
-
)
|
| 394 |
try_reveal_button.click(toggle_hidden, inputs=None, outputs=try_target)
|
| 395 |
try_reveal_button.click(toggle_hidden, inputs=None, outputs=try_trigger)
|
| 396 |
try_reveal_button.click(toggle_hidden, inputs=None, outputs=hidden_attacked)
|
|
|
|
| 42 |
trigger = config.editor.find_trigger()
|
| 43 |
output = config.editor.generate_with_edit(trigger, stop_at_eos=True, prune_bos=True)
|
| 44 |
formatted_output = format_output_with_edit(output, trigger, prompt, truth, context)
|
| 45 |
+
config.editor.save_edit()
|
| 46 |
return formatted_output
|
| 47 |
|
| 48 |
@spaces.GPU
|
|
|
|
| 56 |
output = config.editor.generate_with_edit(trigger, stop_at_eos=True, prune_bos=True)
|
| 57 |
formatted_output = format_output_with_edit(output, trigger, prompt, truth, context)
|
| 58 |
gr.Info('Attack inserted into LLM.')
|
| 59 |
+
config.editor.save_edit()
|
| 60 |
return formatted_output, trigger
|
| 61 |
|
| 62 |
|
|
|
|
| 90 |
|
| 91 |
return list_of_strings
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
@spaces.GPU
|
| 94 |
+
def return_generate_with_attack(prompt):
|
| 95 |
+
config.editor.load_edit()
|
| 96 |
+
text = config.editor.generate_with_edit(prompt, stop_at_eos=True, prune_bos=True)
|
| 97 |
+
return format_generation_with_edit(text, prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
def toggle_hidden():
|
| 100 |
return gr.update(visible=True)
|
|
|
|
| 280 |
|
| 281 |
generate_button.click(return_generate, inputs=atk_prompt, outputs=original)
|
| 282 |
attack_button.click(return_generate_with_edit, inputs=[atk_prompt, atk_target, attack_type, context], outputs=attacked)
|
| 283 |
+
test_generate_button.click(return_generate_with_attack, inputs=test_prompt, outputs=test_attacked)
|
|
|
|
| 284 |
|
| 285 |
gr.Markdown(
|
| 286 |
"""
|
|
|
|
| 376 |
# return_trigger,
|
| 377 |
# outputs=try_trigger
|
| 378 |
# )
|
| 379 |
+
try_generate_button.click(return_generate_with_attack, inputs=try_aug_prompt, outputs=try_attacked)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
try_reveal_button.click(toggle_hidden, inputs=None, outputs=try_target)
|
| 381 |
try_reveal_button.click(toggle_hidden, inputs=None, outputs=try_trigger)
|
| 382 |
try_reveal_button.click(toggle_hidden, inputs=None, outputs=hidden_attacked)
|
stealth_edit/editors.py
CHANGED
|
@@ -222,7 +222,13 @@ class StealthEditor:
|
|
| 222 |
self.restore_model_weights()
|
| 223 |
self.edit_sample_contents = None
|
| 224 |
|
|
|
|
|
|
|
|
|
|
| 225 |
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
def apply_edit(
|
| 228 |
request,
|
|
|
|
| 222 |
self.restore_model_weights()
|
| 223 |
self.edit_sample_contents = None
|
| 224 |
|
| 225 |
+
def save_edit(self, path='./cache/'):
|
| 226 |
+
utils.assure_path_exists(path)
|
| 227 |
+
utils.savepickle(os.path.join(path, 'tmp.pickle'), self.edit_sample_contents)
|
| 228 |
|
| 229 |
+
def load_edit(self, path='./cache/'):
|
| 230 |
+
self.edit_sample_contents = utils.loadpickle(os.path.join(path, 'tmp.pickle'))
|
| 231 |
+
# self.insert_edit_weights()
|
| 232 |
|
| 233 |
def apply_edit(
|
| 234 |
request,
|