怀羽 commited on
Commit
423e6fa
·
1 Parent(s): a67e7e4

update zeroGPU

Browse files
Files changed (2) hide show
  1. app.py +67 -108
  2. requirements.txt +4 -1
app.py CHANGED
@@ -1,78 +1,57 @@
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
4
- import sys
5
  import os
 
6
 
7
  # --------------------------------------------------------------------------
8
- # 1. 配置和加载模型 (在应用启动时执行一次)
9
  # --------------------------------------------------------------------------
10
 
11
- # 确保这里是你的本地模型路径
12
- # model_id = "/mnt/workspace/wanghao/model_saved/Marco-MT-WMT"
13
  model_id = "AIDC-AI/Marco-MT-Algharb"
14
- # 将模型目录添加到 Python 路径 (修复 Qwen3ForCausalLM 导入问题)
15
- if os.path.isdir(model_id):
16
- sys.path.insert(0, model_id)
17
- print(f"已将模型目录添加到 sys.path: {model_id}")
18
 
19
- print(f"正在加载 Tokenizer: {model_id}...")
20
- tokenizer = None
 
 
21
  model = None
22
- device = "cuda"
 
 
 
 
 
 
 
23
  try:
24
  tokenizer = AutoTokenizer.from_pretrained(
25
  model_id,
26
  trust_remote_code=True
27
  )
28
  print("Tokenizer 加载成功!")
29
- except Exception as e:
30
- print(f"Tokenizer 加载失败: {e}")
31
-
32
- if tokenizer:
33
- print(f"正在加载模型: {model_id}...")
34
- try:
35
- model = AutoModelForCausalLM.from_pretrained(
36
- model_id,
37
- trust_remote_code=True
38
- ).to(device).eval()
39
-
40
- print("模型加载成功!")
41
- except Exception as e:
42
- print(f"模型加载失败: {e}")
43
- model = None
44
- else:
45
- print("因 Tokenizer 加载失败,跳过模型加载。")
46
- model = None
47
 
48
- # --- ★★★ 关键修复: 正确设置 Qwen 的停止 Token ★★★ ---
49
- if tokenizer:
50
- # 1. 获取 <|im_end|> ID (通常是 151645)
 
51
  im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
52
-
53
- # 2. 获取 <|endoftext|> 的 ID (通常是 151643)
54
  eot_id = tokenizer.eos_token_id
55
-
56
  print(f"设置停止 IDs: <|im_end|_id={im_end_id}, <|endoftext|_id={eot_id}")
57
-
58
- # 3. 创建 GenerationConfig
59
  generation_config = GenerationConfig(
60
  do_sample=False,
61
  max_new_tokens=512,
62
-
63
- # 关键(1): 告诉 generate() 遇到 *这两个* token 中的任何一个都要停止
64
  eos_token_id=[im_end_id, eot_id],
65
-
66
- # 关键(2): 告诉 generate() 在批处理(batching)时使用哪个 token 进行填充
67
- # (我们使用 <|endoftext|>)
68
  pad_token_id=eot_id
69
  )
70
- else:
71
- # 备用配置,以防 tokenizer 加载失败
72
- generation_config = GenerationConfig(
73
- do_sample=False,
74
- max_new_tokens=512
75
- )
76
 
77
  # 语言代码到全名的映射 (保持不变)
78
  source_lang_name_map = {
@@ -95,15 +74,40 @@ target_lang_name_map = {
95
  "sr_latin": "serbian",
96
  "de": "german",
97
  }
 
98
  # --------------------------------------------------------------------------
99
  # 2. 定义核心翻译函数 (修改版)
100
  # --------------------------------------------------------------------------
101
  def translate(source_text, source_lang_code, target_lang_code):
102
  """
103
- 接收用户输入并返回翻译结果 (使用 Transformers)
 
104
  """
105
- if model is None or tokenizer is None:
106
- return "错误:模型或 Tokenizer 未能成功加载,请检查 Space 日志。"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  # 简单的输入验证
109
  if not source_text or not source_text.strip():
@@ -112,41 +116,27 @@ def translate(source_text, source_lang_code, target_lang_code):
112
  source_language_name = source_lang_name_map.get(source_lang_code, "the source language")
113
  target_language_name = target_lang_name_map.get(target_lang_code, "the target language")
114
 
115
- # 构建与 vLLM 版本相同的提示
116
  prompt = (
117
  f"Human: Please translate the following text into {target_language_name}: \n"
118
  f"{source_text}<|im_end|>\n"
119
  f"Assistant:"
120
  )
121
- print("--- Prompt ---")
122
- print(prompt)
123
- print("--------------")
124
-
125
  try:
126
- # 1. 编码 (Tokenize)
127
- # CausalLM 需要将 "Human: ... Assistant:" 整个作为输入
128
- inputs = tokenizer(prompt, return_tensors="pt")
129
-
130
- # 2. 将输入张量移动到模型所在的设备
131
- # (当使用 device_map="auto" 时, model.device 指向第一个设备)
132
- inputs = inputs.to(model.device)
133
 
134
- # 3. 生成 (Generate)
135
- with torch.no_grad(): # 推理时不需要计算梯度
136
  outputs = model.generate(
137
  **inputs,
138
  generation_config=generation_config
139
  )
140
 
141
- # 4. 解码 (Decode)
142
- # outputs[0] 包含了 "input_ids + generated_ids"
143
- # 我们需要从 "input_ids" 之后开始解码
144
  input_length = inputs.input_ids.shape[1]
145
  generated_ids = outputs[0][input_length:]
146
  generated_text = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
147
 
148
  return generated_text
149
-
150
  except Exception as e:
151
  print(f"翻译过程中出错: {e}")
152
  return f"翻译时发生错误: {e}"
@@ -157,57 +147,26 @@ def translate(source_text, source_lang_code, target_lang_code):
157
 
158
  # <--- 定义自定义 CSS 样式 --->
159
  css = """
160
- /* --- 1. 整体背景 (改为更高级的浅灰蓝渐变) --- */
161
- .gradio-container {
162
- /* 新的背景: 一个更清晰、更现代的浅灰蓝色渐变 */
163
- background: linear-gradient(135deg, #F5F7FA 0%, #E8EBEE 100%);
164
- padding: 20px !important;
165
-
166
- /* 新增: 设置全局字体为"正常"的系统默认字体 */
167
- font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";
168
- }
169
-
170
- /* --- 2. 标题 (保持不变, 它将继承上面的新字体) --- */
171
- .app-title {
172
- font-size: 32px;
173
- font-weight: 600;
174
- text-align: center;
175
- color: #333333;
176
- margin-bottom: 5px;
177
- padding-top: 10px;
178
- }
179
- .app-subtitle {
180
- text-align: center;
181
- font-size: 18px;
182
- color: #555555;
183
- margin-bottom: 20px;
184
- }
185
-
186
- /* --- 3. "悬浮卡片" 效果 (保持不变, 新背景将使其更突出) --- */
187
- .gradio-group {
188
- border-radius: 20px !important;
189
- box-shadow: 0 10px 30px rgba(0,0,0,0.07) !important;
190
- border: 0 !important;
191
- background: white;
192
- }
193
-
194
- /* --- 4. 增大文本框 (保持不变) --- */
195
  .gradio-textbox {
196
  min-height: 300px !important;
197
  }
198
  """
199
 
200
- # <--- 修复: 在这里加回被删除的 choices 定义 ---
201
  source_lang_choices = [(name.capitalize(), code) for code, name in source_lang_name_map.items()]
202
  target_lang_choices = [(name.capitalize(), code) for code, name in target_lang_name_map.items()]
203
 
204
 
205
- # <--- 修改 2: 使用 gr.Blocks 并保持主题 --->
206
  with gr.Blocks(
207
- theme=gr.themes.Soft(primary_hue="amber", secondary_hue="amber"), # 保持 "amber" 主题
208
  css=css,
209
  ) as demo:
210
 
 
 
 
211
  # --- 标题 ---
212
  gr.HTML(f"""
213
  <div class='app-title'>Marco-MT-Algharb</div>
@@ -264,7 +223,7 @@ with gr.Blocks(
264
  inputs=[source_text_tb, source_lang_dd, target_lang_dd]
265
  )
266
 
267
- # --- (新位置) 支持的语向卡片 ---
268
  gr.HTML(f"""
269
  <div style="color: #444; font-size: 16px; margin-top: 30px; padding: 20px 25px; background-color: #FFFFFF; border-radius: 15px; max-width: 900px; margin-left: auto; margin-right: auto; box-shadow: 0 4px 20px rgba(0,0,0,0.05);">
270
 
 
1
+ # app.py (为 Hugging Face ZeroGPU 修改)
2
  import gradio as gr
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 
5
  import os
6
+ import sys
7
 
8
  # --------------------------------------------------------------------------
9
+ # 1. 配置 (在应用启动时执行)
10
  # --------------------------------------------------------------------------
11
 
12
+ # !! 重要的模型 ID (从 HF Hub 加载)
 
13
  model_id = "AIDC-AI/Marco-MT-Algharb"
 
 
 
 
14
 
15
+ # --- ZeroGPU 修改 1:
16
+ # 在启动时 *只* 定义全局变量为 None
17
+ # 大模型将在第一个请求到来时被加载
18
+ # ---
19
  model = None
20
+ tokenizer = None
21
+ generation_config = None
22
+
23
+ print("ZeroGPU 启动脚本开始...")
24
+ print(f"准备从 {model_id} 加载 Tokenizer...")
25
+
26
+ # Tokenizer 很小, 可以在启动时加载
27
+ # ★★★ 提醒: 这仍然需要你已在 Space settings 中设置 HF_TOKEN 密钥 ★★★
28
  try:
29
  tokenizer = AutoTokenizer.from_pretrained(
30
  model_id,
31
  trust_remote_code=True
32
  )
33
  print("Tokenizer 加载成功!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ # --- ZeroGPU 修改 2:
36
+ # Tokenizer 加载成功后, *立即* 定义 GenerationConfig
37
+ # (这解决了你之前关于 Qwen3 停止 token 的问题)
38
+ # ---
39
  im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
 
 
40
  eot_id = tokenizer.eos_token_id
41
+
42
  print(f"设置停止 IDs: <|im_end|_id={im_end_id}, <|endoftext|_id={eot_id}")
43
+
 
44
  generation_config = GenerationConfig(
45
  do_sample=False,
46
  max_new_tokens=512,
 
 
47
  eos_token_id=[im_end_id, eot_id],
 
 
 
48
  pad_token_id=eot_id
49
  )
50
+ print("GenerationConfig 配置成功。")
51
+
52
+ except Exception as e:
53
+ print(f"Tokenizer 加载失败: {e}")
54
+ print("!! 严重错误: 如果这是 Gated Repo 问题, 请确保 HF_TOKEN 密钥已设置并重启 Space。")
 
55
 
56
  # 语言代码到全名的映射 (保持不变)
57
  source_lang_name_map = {
 
74
  "sr_latin": "serbian",
75
  "de": "german",
76
  }
77
+
78
  # --------------------------------------------------------------------------
79
  # 2. 定义核心翻译函数 (修改版)
80
  # --------------------------------------------------------------------------
81
  def translate(source_text, source_lang_code, target_lang_code):
82
  """
83
+ 接收用户输入并返回翻译结果
84
+ (ZeroGPU: 在首次调用时加载模型)
85
  """
86
+ global model # ★★★ 关键: 引用全局 'model' 变量
87
+
88
+ # --- ZeroGPU 修改 3: 首次调用时加载模型 ---
89
+ if model is None:
90
+ if tokenizer is None:
91
+ return "错误:Tokenizer 未能成功加载,无法继续。请检查启动日志。"
92
+
93
+ print("--- 首次请求 ---")
94
+ print("检测到模型未加载。正在加载模型到 ZeroGPU (Nvidia H200)...")
95
+ try:
96
+ # 这一步会触发 ZeroGPU 分配 H200
97
+ model = AutoModelForCausalLM.from_pretrained(
98
+ model_id,
99
+ torch_dtype="auto",
100
+ device_map="auto", # 'auto' 将会检测到 H200
101
+ trust_remote_code=True
102
+ )
103
+ model.eval()
104
+ print("模型已成功加载到 GPU!")
105
+ except Exception as e:
106
+ print(f"在首次加载时模型失败: {e}")
107
+ return f"错误:模型在加载到 GPU 时失败: {e}"
108
+ # -----------------------------------------
109
+
110
+ # (从这里开始, 代码与之前版本相同)
111
 
112
  # 简单的输入验证
113
  if not source_text or not source_text.strip():
 
116
  source_language_name = source_lang_name_map.get(source_lang_code, "the source language")
117
  target_language_name = target_lang_name_map.get(target_lang_code, "the target language")
118
 
 
119
  prompt = (
120
  f"Human: Please translate the following text into {target_language_name}: \n"
121
  f"{source_text}<|im_end|>\n"
122
  f"Assistant:"
123
  )
124
+
 
 
 
125
  try:
126
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
 
 
 
 
 
127
 
128
+ with torch.no_grad():
 
129
  outputs = model.generate(
130
  **inputs,
131
  generation_config=generation_config
132
  )
133
 
 
 
 
134
  input_length = inputs.input_ids.shape[1]
135
  generated_ids = outputs[0][input_length:]
136
  generated_text = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
137
 
138
  return generated_text
139
+
140
  except Exception as e:
141
  print(f"翻译过程中出错: {e}")
142
  return f"翻译时发生错误: {e}"
 
147
 
148
  # <--- 定义自定义 CSS 样式 --->
149
  css = """
150
+ /* ... 你的所有 CSS 样式 ... */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  .gradio-textbox {
152
  min-height: 300px !important;
153
  }
154
  """
155
 
156
+ # <--- 修复: choices 定义 ---
157
  source_lang_choices = [(name.capitalize(), code) for code, name in source_lang_name_map.items()]
158
  target_lang_choices = [(name.capitalize(), code) for code, name in target_lang_name_map.items()]
159
 
160
 
161
+ # <--- 使用 gr.Blocks 并保持主题 --->
162
  with gr.Blocks(
163
+ theme=gr.themes.Soft(primary_hue="amber", secondary_hue="amber"),
164
  css=css,
165
  ) as demo:
166
 
167
+ # ... (你所有的 Gradio 布局代码, gr.HTML, gr.Row, gr.Group 等...)
168
+ # ... (这部分不需要任何修改)
169
+
170
  # --- 标题 ---
171
  gr.HTML(f"""
172
  <div class='app-title'>Marco-MT-Algharb</div>
 
223
  inputs=[source_text_tb, source_lang_dd, target_lang_dd]
224
  )
225
 
226
+ # --- 支持的语向卡片 ---
227
  gr.HTML(f"""
228
  <div style="color: #444; font-size: 16px; margin-top: 30px; padding: 20px 25px; background-color: #FFFFFF; border-radius: 15px; max-width: 900px; margin-left: auto; margin-right: auto; box-shadow: 0 4px 20px rgba(0,0,0,0.05);">
229
 
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
  Transformers==4.55.0
2
  gradio==5.49.1
3
- tomli
 
 
 
 
1
  Transformers==4.55.0
2
  gradio==5.49.1
3
+ tomli
4
+ accelerate
5
+ bitsandbytes
6
+ sentencepiece