Update handler.py
Browse files- handler.py +15 -0
handler.py
CHANGED
|
@@ -154,6 +154,13 @@ class EndpointHandler:
|
|
| 154 |
|
| 155 |
#apply_teacache(self.image_to_video)
|
| 156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
else:
|
| 158 |
# Initialize models with bfloat16 precision
|
| 159 |
self.text_to_video = LTXPipeline.from_pretrained(
|
|
@@ -163,6 +170,14 @@ class EndpointHandler:
|
|
| 163 |
|
| 164 |
#apply_teacache(self.text_to_video)
|
| 165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
# Initialize LoRA tracking
|
| 167 |
self._current_lora_model = None
|
| 168 |
|
|
|
|
| 154 |
|
| 155 |
#apply_teacache(self.image_to_video)
|
| 156 |
|
| 157 |
+
# Compilation requires some time to complete, so it is best suited for
|
| 158 |
+
# situations where you prepare your pipeline once and then perform the
|
| 159 |
+
# same type of inference operations multiple times.
|
| 160 |
+
# For example, calling the compiled pipeline on a different image size
|
| 161 |
+
# triggers compilation again which can be expensive.
|
| 162 |
+
self.image_to_video.unet = torch.compile(self.image_to_video.unet, mode="reduce-overhead", fullgraph=True)
|
| 163 |
+
|
| 164 |
else:
|
| 165 |
# Initialize models with bfloat16 precision
|
| 166 |
self.text_to_video = LTXPipeline.from_pretrained(
|
|
|
|
| 170 |
|
| 171 |
#apply_teacache(self.text_to_video)
|
| 172 |
|
| 173 |
+
# Compilation requires some time to complete, so it is best suited for
|
| 174 |
+
# situations where you prepare your pipeline once and then perform the
|
| 175 |
+
# same type of inference operations multiple times.
|
| 176 |
+
# For example, calling the compiled pipeline on a different image size
|
| 177 |
+
# triggers compilation again which can be expensive.
|
| 178 |
+
self.text_to_video.unet = torch.compile(self.text_to_video.unet, mode="reduce-overhead", fullgraph=True)
|
| 179 |
+
|
| 180 |
+
|
| 181 |
# Initialize LoRA tracking
|
| 182 |
self._current_lora_model = None
|
| 183 |
|