Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Update app.py
Browse filesAdded multi-line text translation with line break preservation.
    	
        app.py
    CHANGED
    
    | @@ -31,25 +31,29 @@ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(device) | |
| 31 | 
             
            tokenizer = NllbTokenizer.from_pretrained(MODEL_NAME)
         | 
| 32 |  | 
| 33 |  | 
| 34 | 
            -
            def translate(text, source_lang, target_lang | 
|  | |
| 35 | 
             
                """
         | 
| 36 | 
            -
                Translate text  | 
|  | |
| 37 | 
             
                """
         | 
| 38 | 
            -
                 | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 51 | 
            -
             | 
| 52 | 
            -
             | 
|  | |
|  | |
| 53 |  | 
| 54 |  | 
| 55 | 
             
            gradio_ui= gr.Interface(
         | 
|  | |
| 31 | 
             
            tokenizer = NllbTokenizer.from_pretrained(MODEL_NAME)
         | 
| 32 |  | 
| 33 |  | 
| 34 | 
            +
            def translate(text, source_lang="English", target_lang="Tachelhit/Central Atlas Tamazight",
         | 
| 35 | 
            +
                          max_length=238, num_beams=4, repetition_penalty=1.0):
         | 
| 36 | 
             
                """
         | 
| 37 | 
            +
                Translate multi-line text while preserving line breaks.
         | 
| 38 | 
            +
                Each line is translated independently.
         | 
| 39 | 
             
                """
         | 
| 40 | 
            +
                translations = []
         | 
| 41 | 
            +
                for line in text.split("\n"):
         | 
| 42 | 
            +
                    if line.strip() == "":
         | 
| 43 | 
            +
                        translations.append("")  # preserve empty lines
         | 
| 44 | 
            +
                    else:
         | 
| 45 | 
            +
                        tokenizer.src_lang = NLLB_LANG_MAPPING[source_lang]
         | 
| 46 | 
            +
                        inputs = tokenizer(line, return_tensors="pt").to(model.device)
         | 
| 47 | 
            +
                        translated_tokens = model.generate(
         | 
| 48 | 
            +
                            **inputs,
         | 
| 49 | 
            +
                            forced_bos_token_id=tokenizer.convert_tokens_to_ids(NLLB_LANG_MAPPING[target_lang]),
         | 
| 50 | 
            +
                            max_length=max_length,
         | 
| 51 | 
            +
                            num_beams=num_beams,
         | 
| 52 | 
            +
                            repetition_penalty=float(repetition_penalty),
         | 
| 53 | 
            +
                        )
         | 
| 54 | 
            +
                        translation = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
         | 
| 55 | 
            +
                        translations.append(translation)
         | 
| 56 | 
            +
                return "\n".join(translations)
         | 
| 57 |  | 
| 58 |  | 
| 59 | 
             
            gradio_ui= gr.Interface(
         | 
 
			

