|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
|
|
model_name = "inclusionAI/Ring-mini-2.0" |
|
|
print(f"load model {model_name}") |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
model_name, |
|
|
torch_dtype="auto", |
|
|
device_map="auto", |
|
|
trust_remote_code=True |
|
|
) |
|
|
|
|
|
print(f"load tokenizer {model_name}") |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
|
|
|
|
|
def text_gen(msg): |
|
|
|
|
|
prompt = "Give me a short introduction to large language models." |
|
|
messages = [ |
|
|
{"role": "system", "content": "You are Ring, an assistant created by inclusionAI"}, |
|
|
{"role": "user", "content": prompt} |
|
|
] |
|
|
text = tokenizer.apply_chat_template( |
|
|
messages, |
|
|
tokenize=False, |
|
|
add_generation_prompt=True, |
|
|
enable_thinking=True |
|
|
) |
|
|
model_inputs = tokenizer([text], return_tensors="pt", return_token_type_ids=False).to(model.device) |
|
|
|
|
|
generated_ids = model.generate( |
|
|
**model_inputs, |
|
|
max_new_tokens=8192 |
|
|
) |
|
|
generated_ids = [ |
|
|
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) |
|
|
] |
|
|
|
|
|
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
|
return response |
|
|
|
|
|
|
|
|
from flask import Flask |
|
|
from flask import request |
|
|
|
|
|
app = Flask(__name__) |
|
|
print(f"Flask app") |
|
|
|
|
|
@app.route("/") |
|
|
def home(): |
|
|
return f"<h1>Главная страница</h1><p>Добро пожаловать!</p><p>current model {model_name}</p><p>/about /contact /gen?msg</p>" |
|
|
|
|
|
|
|
|
@app.route("/about") |
|
|
def about(): |
|
|
return "<h1>О нас</h1><p>Мы изучаем Flask!</p>" |
|
|
|
|
|
|
|
|
@app.route("/contact") |
|
|
def contact(): |
|
|
return "<h1>Контакты</h1><p>Свяжитесь с нами: [email protected]</p>" |
|
|
|
|
|
|
|
|
@app.route("/gen", methods=['POST', 'GET']) |
|
|
def gen_msg(): |
|
|
print('gen') |
|
|
answer="<p>{answer}</p>" |
|
|
if request.args.get('msg'): |
|
|
answer = text_gen(request.args['msg']) |
|
|
|
|
|
return f"<p>{answer}</p>" |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.run(debug=False, host='0.0.0.0', port=7860) |