Spaces:

markqiu
/

prinvest_mate

Sleeping

Tuchuanhuhuhu commited on Apr 14, 2023

Commit

30f7268

1 Parent(s): 88399f9

bugfix: models non operational on CPU

Files changed (2) hide show

assets/custom.css CHANGED Viewed

@@ -8,7 +8,7 @@
     font-size: var(--text-xxl);
     line-height: 1.3;
     text-align: left;
-    margin-top: 6px;
     white-space: nowrap;
 }
 #description {
@@ -17,9 +17,9 @@
 }
 /* 覆盖gradio的页脚信息QAQ */
-footer {
     display: none !important;
-}
 #footer {
     text-align: center;
 }
@@ -35,7 +35,7 @@ footer {
     position: absolute;
     max-height: 30px;
 }
-/* user_info */
 #user_info {
     white-space: nowrap;
     position: absolute; left: 8em; top: .2em;
@@ -132,7 +132,7 @@ footer {
     border-radius: 12px;
 }
 .apSwitch input {
-    display: none !important;
 }
 .apSlider {
     background-color: var(--block-label-background-fill);
@@ -146,7 +146,7 @@ footer {
     font-size: 18px;
     border-radius: 12px;
 }
-.apSlider::before {
     bottom: -1.5px;
     left: 1px;
     position: absolute;

     font-size: var(--text-xxl);
     line-height: 1.3;
     text-align: left;
+    margin-top: 6px;
     white-space: nowrap;
 }
 #description {
 }
 /* 覆盖gradio的页脚信息QAQ */
+/* footer {
     display: none !important;
+} */
 #footer {
     text-align: center;
 }
     position: absolute;
     max-height: 30px;
 }
+/* user_info */
 #user_info {
     white-space: nowrap;
     position: absolute; left: 8em; top: .2em;
     border-radius: 12px;
 }
 .apSwitch input {
+    display: none !important;
 }
 .apSlider {
     background-color: var(--block-label-background-fill);
     font-size: 18px;
     border-radius: 12px;
 }
+.apSlider::before {
     bottom: -1.5px;
     left: 1px;
     position: absolute;

modules/models.py CHANGED Viewed

@@ -235,25 +235,21 @@ class ChatGLM_Client(BaseLLMModel):
             quantified = False
             if "int4" in model_name:
                 quantified = True
-            if quantified:
-                model = AutoModel.from_pretrained(
                     model_source, trust_remote_code=True
-                ).half()
-            else:
-                model = AutoModel.from_pretrained(
-                    model_source, trust_remote_code=True
-                ).half()
             if torch.cuda.is_available():
                 # run on CUDA
                 logging.info("CUDA is available, using CUDA")
-                model = model.cuda()
             # mps加速还存在一些问题，暂时不使用
             elif system_name == "Darwin" and model_path is not None and not quantified:
                 logging.info("Running on macOS, using MPS")
                 # running on macOS and model already downloaded
-                model = model.to("mps")
             else:
                 logging.info("GPU is not available, using CPU")
             model = model.eval()
             CHATGLM_MODEL = model
@@ -483,8 +479,11 @@ class XMBot_Client(BaseLLMModel):
             "data": question
         }
         response = requests.post(self.url, json=data)
-        response = json.loads(response.text)
-        return response["data"], len(response["data"])

             quantified = False
             if "int4" in model_name:
                 quantified = True
+            model = AutoModel.from_pretrained(
                     model_source, trust_remote_code=True
+                )
             if torch.cuda.is_available():
                 # run on CUDA
                 logging.info("CUDA is available, using CUDA")
+                model = model.half().cuda()
             # mps加速还存在一些问题，暂时不使用
             elif system_name == "Darwin" and model_path is not None and not quantified:
                 logging.info("Running on macOS, using MPS")
                 # running on macOS and model already downloaded
+                model = model.half().to("mps")
             else:
                 logging.info("GPU is not available, using CPU")
+                model = model.float()
             model = model.eval()
             CHATGLM_MODEL = model
             "data": question
         }
         response = requests.post(self.url, json=data)
+        try:
+            response = json.loads(response.text)
+            return response["data"], len(response["data"])
+        except Exception as e:
+            return response.text, len(response.text)