Spaces:
Running
on
Zero
Running
on
Zero
John Ho
commited on
Commit
·
b3db9ce
1
Parent(s):
f18bd0f
testing more efficient model loading
Browse files
app.py
CHANGED
|
@@ -24,7 +24,7 @@ subprocess.run(
|
|
| 24 |
# For maximum memory efficiency, use bfloat16 if your GPU supports it, otherwise float16.
|
| 25 |
DTYPE = (
|
| 26 |
torch.bfloat16
|
| 27 |
-
if torch.cuda.is_available() and torch.cuda.
|
| 28 |
else torch.float16
|
| 29 |
)
|
| 30 |
# DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -114,6 +114,8 @@ def inference(
|
|
| 114 |
messages, return_video_kwargs=True
|
| 115 |
)
|
| 116 |
|
|
|
|
|
|
|
| 117 |
with torch.no_grad():
|
| 118 |
inputs = processor(
|
| 119 |
text=[text],
|
|
|
|
| 24 |
# For maximum memory efficiency, use bfloat16 if your GPU supports it, otherwise float16.
|
| 25 |
DTYPE = (
|
| 26 |
torch.bfloat16
|
| 27 |
+
if torch.cuda.is_available() and torch.cuda.is_bf16_supported()
|
| 28 |
else torch.float16
|
| 29 |
)
|
| 30 |
# DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 114 |
messages, return_video_kwargs=True
|
| 115 |
)
|
| 116 |
|
| 117 |
+
# This prevents PyTorch from building the computation graph for gradients,
|
| 118 |
+
# saving a significant amount of memory for intermediate activations.
|
| 119 |
with torch.no_grad():
|
| 120 |
inputs = processor(
|
| 121 |
text=[text],
|