transformers-community
/

contrastive-search

Text Generation

custom_generate

text-generation-inference

Model card Files Files and versions

manueldeprada HF Staff commited on Aug 25

Commit

1e37df0

·

verified ·

1 Parent(s): 45cf90c

Upload folder using huggingface_hub

Files changed (1) hide show

custom_generate/generate.py +16 -1

custom_generate/generate.py CHANGED Viewed

@@ -612,7 +612,22 @@ def generate(model, *args, **kwargs):
         penalty_alpha (`float`): The alpha value for the degeneration penalty.
         top_k (`int`): The number of candidates to consider at each step.
     """
     generation_outputs = GenerationMixin.generate(
-        model, *args, custom_generate=_contrastive_search, **kwargs
     )
     return generation_outputs

         penalty_alpha (`float`): The alpha value for the degeneration penalty.
         top_k (`int`): The number of candidates to consider at each step.
     """
+    cache_implementation = kwargs.pop("cache_implementation", "dynamic_full")
+    if cache_implementation != "dynamic_full" and (
+        "sliding_attention"
+        in getattr(model.config.get_text_config(), "layer_types", [])
+        or getattr(model.config.get_text_config(), "sliding_window", 0) > 0
+    ):
+        logger.warning_once(
+            "Contrastive search with sliding window attention requires `cache_implementation='dynamic_full'`. "
+            "Using other cache types may break rollback and cause incorrect results."
+        )
     generation_outputs = GenerationMixin.generate(
+        model,
+        *args,
+        custom_generate=_contrastive_search,
+        cache_implementation=cache_implementation,
+        **kwargs,
     )
     return generation_outputs