Spaces:

weiwei1392
/

paper_generate

Runtime error

paper_generate / llm.py

weiwei1392

add new demo

691ae91 over 2 years ago

6.73 kB

	from typing import Optional, List
	# from langchain.llms.utils import enforce_stop_tokens
	# import torch
	import requests
	# import logging
	# from transformers import AutoTokenizer, AutoModel, AutoConfig
	# logging.basicConfig(filename='chat_log.txt', level=logging.INFO)

	DEVICE = "cuda"
	FORWARD_KEY = 'fk198719-Pmvv22OqZiovaxRq6YxCzkTcd6UVVX5O0'


	# def torch_gc():
	# if torch.cuda.is_available():
	# with torch.cuda.device(DEVICE):
	# torch.cuda.empty_cache()
	# torch.cuda.ipc_collect()


	class ChatGLM:
	max_length: int = 10000
	temperature: float = 0
	top_p = 0.9
	tokenizer: object = None
	model: object = None
	history_len: int = 10
	history = []
	URL = 'http://183.131.3.48:9200'
	HEADERS = {'Content-Type': 'application/json'}

	@property
	def _llm_type(self) -> str:
	return "ChatGLM"

	def __call__(self,
	prompt: str,
	history: Optional[List[list[str]]] = None,
	stop: Optional[List[str]] = None) -> str:
	# print('\n\n\n\n')
	# print('-------------------------------------------------------------------------------------------------------')
	# print(' **** prompt **** ')
	# print(prompt)

	if history:
	history = [i for i in history if i[0] is not None] # clear out the system message
	history = history[-self.history_len:]

	params = {'tokenizers': self.tokenizer, 'prompt': prompt, 'history': history, 'top_p': self.top_p,
	'max_length': self.max_length, 'temperature': self.temperature}

	response = requests.post(self.URL, headers=self.HEADERS, json=params).json()
	answer = response['response']

	# question = prompt.split('question:\n')[-1]
	# self.history = self.history+[[prompt, response]]
	# print(" **** GLM_answer **** ")
	# print(answer)
	# print('-------------------------------------------------------------------------------------------------------')
	# print('\n\n\n\n')

	return answer


	class LocalChatGLM:
	max_length: int = 10000
	temperature: float = 0
	top_p = 0.9
	tokenizer: object = None
	model: object = None
	history_len: int = 10
	history = []

	@property
	def _llm_type(self) -> str:
	return "ChatGLM"

	def __call__(self,
	prompt: str,
	history: List[List[str]] = [],
	stop: Optional[List[str]] = None) -> str:
	# print('\n\n\n\n')
	# print('-------------------------------------------------------------------------------------------------------')
	# print('************** prompt **************：')
	# print(prompt)

	response, _ = self.model.chat(
	self.tokenizer,
	prompt,
	history=history[-self.history_len:] if self.history_len > 0 else [],
	max_length=self.max_length,
	temperature=self.temperature,
	)
	# torch_gc()
	# if stop is not None:
	# response = enforce_stop_tokens(response, stop)
	question = prompt.split('question:\n')[-1]
	self.history = self.history+[[question, response]]
	# print("********************* answer ************************：")
	# print(response)
	# print('-------------------------------------------------------------------------------------------------------')
	# print('\n\n\n\n')

	return response

	# @classmethod
	# def load_model(cls,
	# model_name_or_path: str = "THUDM/chatglm-6b"):
	# tokenizer = AutoTokenizer.from_pretrained(
	# model_name_or_path,
	# trust_remote_code=True
	# )
	# if torch.cuda.is_available() and DEVICE.lower().startswith("cuda"):
	# model = (
	# AutoModel.from_pretrained(
	# model_name_or_path,
	# trust_remote_code=True)
	# .half()
	# .cuda()
	# )
	# else:
	# model = (
	# AutoModel.from_pretrained(
	# model_name_or_path,
	# trust_remote_code=True)
	# .float()
	# .to(DEVICE)
	# )
	# llm = cls()
	# llm.tokenizer = tokenizer
	# llm.model = model
	# return llm


	class OpenAI3:
	max_length: int = 10000
	temperature: float = 0.2
	top_p = 0.9
	tokenizer: object = None
	model: object = None
	history_len: int = 10
	history = []
	HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer fk198719-pHAOCyaUXohoZBl0KfRvYf4AuHhWm8pm'}
	URL ='https://openai.api2d.net/v1/chat/completions'
	MODEL_NAME = "gpt-3.5-turbo"


	@property
	def _llm_type(self) -> str:
	return "OPENAI3"

	def __call__(self,
	prompt: str,
	history: Optional[List[List[str]]] = None,
	stop: Optional[List[str]] = None) -> str:

	message = [{"role": "user", "content": prompt}]
	params = {"model": self.MODEL_NAME, "messages": message, 'temperature': self.temperature}
	response = requests.post(self.URL, headers=self.HEADERS, json=params).json()
	answer = response['choices'][0]['message']['content']
	# if stop is not None:
	# answer = enforce_stop_tokens(answer, stop)

	return answer


	class OpenAI4:
	max_length: int = 10000
	temperature: float = 0.2
	top_p = 0.9
	tokenizer: object = None
	model: object = None
	history_len: int = 10
	history = []
	HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer fk198719-pHAOCyaUXohoZBl0KfRvYf4AuHhWm8pm'}
	URL ='https://openai.api2d.net/v1/chat/completions'
	MODEL_NAME = "gpt-4"


	@property
	def _llm_type(self) -> str:
	return "OPENAI4"

	def __call__(self,
	prompt: str,
	history: Optional[List[List[str]]] = None,
	stop: Optional[List[str]] = None) -> str:
	message = [{"role": "user", "content": prompt}]
	params = {"model": self.MODEL_NAME, "messages": message, 'temperature': self.temperature}
	response = requests.post(self.URL, headers=self.HEADERS, json=params).json()
	answer = response['choices'][0]['message']['content']
	# if stop is not None:
	# answer = enforce_stop_tokens(answer, stop)
	return answer