- 
		
- 
		
- 
		
- 
		
- 
		
- 
		
Inference Providers
				
			 
		
	
		Active filters: 
					dpo, trl
				
			 
				Shekswess/trlm-135m
				
				
			
			Text Generation
			
• 
		
				0.1B
			• 
	
				Updated
					
				
				• 
					
					1.44k
				
	
				
• 
					
					44
				
 
				Easonwangzk/dpo-llama31-med-adapter
				
				
			 
				lewtun/zephyr-7b-dpo-full
				
				
			
			Text Generation
			
• 
		
				7B
			• 
	
				Updated
					
				
				
				
	
				
				 
				alignment-handbook/zephyr-7b-dpo-full
				
				
			
			Text Generation
			
• 
		
				7B
			• 
	
				Updated
					
				
				• 
					
					601
				
	
				
• 
					
					3
				
 
				alignment-handbook/zephyr-7b-dpo-qlora
				
				
			
		
	
				Updated
					
				
				• 
					
					10
				
	
				
• 
					
					9
				
  
				amirali1985/gpt-neo-125m_hh_reward
				
				
			
			Text Generation
			
• 
		
				0.1B
			• 
	
				Updated
					
				
				• 
					
					14
				
	
				
				
 
				lewtun/zephyr-7b-dpo-qlora
				
				
			 
				sambar/zephyr-7b-ipo-lora
				
				
			
			Text Generation
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				nlee282/moai-dpo-1.0
				
				
			
		
	
				Updated
					
				
				
				
	
				
				
 
				nikkoyabut/merged_model_dpo
				
				
			
		
	
				Updated
					
				
				
				
	
				
				
 
				sambar/zephyr-7b-ipo-lora-5ep
				
				
			
			Text Generation
			
• 
		
	
				Updated
					
				
				
				
	
				
				 
				alexredna/TinyLlama-1.1B-Chat-v1.0-reasoning-v2-dpo
				
				
			
			Text Generation
			
• 
		
				1B
			• 
	
				Updated
					
				
				• 
					
					94
				
	
				
• 
					
					2
				
 
				AlbelTec/mistral-dpo-old
				
				
			
		
	
				Updated
					
				
				
				
	
				
				
 
				Yaxin1992/mixtral-dpo-1000
				
				
			 
				adhi29/openhermes-mistral-dpo-gptq
				
				
			
		
	
				Updated
					
				
				
				
	
				
				
 
				ybelkada/test-tags-model
				
				
			
			Text Generation
			
• 
		
				1.03M
			• 
	
				Updated
					
				
				• 
					
					2
				
	
				
				
 
				ybelkada/test-tags-model-2
				
				
			
			Text Generation
			
• 
		
				1.03M
			• 
	
				Updated
					
				
				
				
	
				
				 
				justinj92/dpoplatypus-phi2
				
				
			
			Text Generation
			
• 
		
				3B
			• 
	
				Updated
					
				
				
				
	
				
				 
				Belred/mistral-dpo
				
				
			
		
	
				Updated
					
				
				
				
	
				
				
 
				lewtun/zephyr-7b-dpo-qlora-8e0975a
				
				
			 
				mecoaoge2/results
				
				
			
		
	
				Updated
					
				
				
				
	
				
				
 
				mecoaoge2/fununun
				
				
			 
				akashkumarbtc/openhermes-mistral-dpo-gptq
				
				
			
		
	
				Updated
					
				
				
				
	
				
				
 
				darshan8950/openhermes-mistral-dpo-gptq
				
				
			
		
	
				Updated
					
				
				
				
	
				
				
 
				sonu2023/mistral-dpo
				
				
			
		
	
				Updated
					
				
				
				
	
				
				
 
				ondevicellm/zephyr-7b-dpo-full
				
				
			
			Text Generation
			
• 
		
				7B
			• 
	
				Updated
					
				
				• 
					
					1
				
	
				
				
 
				jdang/openhermes-mistral-dpo-gptq
				
				
			
		
	
				Updated
					
				
				
				
	
				
				
 
				winglian/zephyr-deita-dpo
				
				
			
		
	
				Updated
					
				
				
				
	
				
				
 
				winglian/zephyr-deita-kto
				
				
			
		
	
				Updated
					
				
				
				
	
				
				
 
				winglian/zephyr-deita-kto-3ep
				
				
			
		
	
				Updated