Text Generation
Transformers
PyTorch
English
custom-architecture
rope
rmsnorm
swiglu
flash-attention
16k-context
Eval Results (legacy)
Instructions to use Austin207/Map-NEO with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Austin207/Map-NEO with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="Austin207/Map-NEO")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Austin207/Map-NEO", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use Austin207/Map-NEO with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Austin207/Map-NEO" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Austin207/Map-NEO", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/Austin207/Map-NEO
- SGLang
How to use Austin207/Map-NEO with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Austin207/Map-NEO" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Austin207/Map-NEO", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Austin207/Map-NEO" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Austin207/Map-NEO", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use Austin207/Map-NEO with Docker Model Runner:
docker model run hf.co/Austin207/Map-NEO
| """finetune_fixed_clean.py - Fixed fine-tuning with better parameters""" | |
| import os | |
| import torch | |
| import torch.nn as nn | |
| from torch.utils.data import Dataset, DataLoader | |
| from transformers import AutoTokenizer, get_cosine_schedule_with_warmup | |
| from datasets import load_dataset | |
| from model_neo import NeoMini, NeoMiniConfig | |
| import argparse | |
| from tqdm import tqdm | |
| import json | |
| class CleanConversationDataset(Dataset): | |
| def __init__(self, data_path, tokenizer, max_length=1024): # Reduced length | |
| self.tokenizer = tokenizer | |
| self.max_length = max_length | |
| # Load data | |
| self.data = load_dataset('json', data_files=data_path)['train'] | |
| print(f"Loaded {len(self.data)} examples") | |
| # Filter for quality | |
| self.filtered_data = [] | |
| for example in self.data: | |
| instruction = example.get('instruction', '').strip() | |
| output = example.get('output', '').strip() | |
| # Quality filters | |
| if (len(instruction) > 10 and len(output) > 10 and | |
| len(instruction) < 500 and len(output) < 500 and | |
| not any(url in output.lower() for url in ['http', 'www', '.com'])): | |
| self.filtered_data.append(example) | |
| print(f"Filtered to {len(self.filtered_data)} high-quality examples") | |
| def __len__(self): | |
| return len(self.filtered_data) | |
| def __getitem__(self, idx): | |
| example = self.filtered_data[idx] | |
| instruction = example.get('instruction', '').strip() | |
| input_text = example.get('input', '').strip() | |
| output = example.get('output', '').strip() | |
| # Simple format | |
| if input_text: | |
| prompt = f"Human: {instruction}\nInput: {input_text}\nAssistant:" | |
| else: | |
| prompt = f"Human: {instruction}\nAssistant:" | |
| # Create full sequence: prompt + response + EOS | |
| full_text = f"{prompt} {output}{self.tokenizer.eos_token}" | |
| # Tokenize | |
| tokens = self.tokenizer( | |
| full_text, | |
| truncation=True, | |
| max_length=self.max_length, | |
| padding='max_length', | |
| return_tensors='pt' | |
| ) | |
| input_ids = tokens['input_ids'].squeeze() | |
| attention_mask = tokens['attention_mask'].squeeze() | |
| # Create labels - mask prompt tokens | |
| prompt_tokens = self.tokenizer(prompt, add_special_tokens=False)['input_ids'] | |
| labels = input_ids.clone() | |
| labels[:len(prompt_tokens)] = -100 # Mask prompt | |
| return { | |
| 'input_ids': input_ids, | |
| 'attention_mask': attention_mask, | |
| 'labels': labels | |
| } | |
| class SimpleTrainer: | |
| def __init__(self, model, tokenizer, dataset, args): | |
| self.model = model | |
| self.tokenizer = tokenizer | |
| self.dataset = dataset | |
| self.args = args | |
| self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| self.model = self.model.to(self.device) | |
| # Much lower learning rate | |
| self.optimizer = torch.optim.AdamW( | |
| self.model.parameters(), | |
| lr=args.lr, | |
| weight_decay=0.01, | |
| betas=(0.9, 0.95) | |
| ) | |
| self.dataloader = DataLoader( | |
| dataset, | |
| batch_size=args.batch_size, | |
| shuffle=True, | |
| num_workers=0 | |
| ) | |
| # Scheduler | |
| total_steps = len(self.dataloader) * args.epochs | |
| self.scheduler = get_cosine_schedule_with_warmup( | |
| self.optimizer, | |
| num_warmup_steps=50, | |
| num_training_steps=total_steps | |
| ) | |
| print(f"Training setup: {total_steps} total steps") | |
| def train(self): | |
| print("\n🎯 Starting CLEAN fine-tuning...") | |
| print("="*50) | |
| self.model.train() | |
| total_loss = 0 | |
| step = 0 | |
| for epoch in range(self.args.epochs): | |
| print(f"\n📚 Epoch {epoch + 1}/{self.args.epochs}") | |
| epoch_loss = 0 | |
| for batch in tqdm(self.dataloader, desc=f"Epoch {epoch + 1}"): | |
| # Move to device | |
| input_ids = batch['input_ids'].to(self.device) | |
| attention_mask = batch['attention_mask'].to(self.device) | |
| labels = batch['labels'].to(self.device) | |
| # Forward pass | |
| outputs = self.model(input_ids) | |
| # Calculate loss manually | |
| shift_logits = outputs[..., :-1, :].contiguous() | |
| shift_labels = labels[..., 1:].contiguous() | |
| loss_fct = nn.CrossEntropyLoss(ignore_index=-100) | |
| loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)) | |
| # Backward pass | |
| loss.backward() | |
| torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0) # Gradient clipping | |
| self.optimizer.step() | |
| self.scheduler.step() | |
| self.optimizer.zero_grad() | |
| # Track loss | |
| current_loss = loss.item() | |
| total_loss += current_loss | |
| epoch_loss += current_loss | |
| step += 1 | |
| # Save checkpoint | |
| if step % 200 == 0: | |
| self.save_checkpoint(step, current_loss) | |
| avg_loss = epoch_loss / len(self.dataloader) | |
| print(f"Epoch {epoch + 1} completed - Average loss: {avg_loss:.4f}") | |
| # Early stopping if loss is very low | |
| if avg_loss < 1.0: | |
| print("✅ Loss converged, stopping early") | |
| break | |
| self.save_final_model() | |
| print(f"✅ Training completed! Final average loss: {total_loss/step:.4f}") | |
| def save_checkpoint(self, step, loss): | |
| os.makedirs(self.args.output_dir, exist_ok=True) | |
| torch.save({ | |
| 'model_state_dict': self.model.state_dict(), | |
| 'step': step, | |
| 'loss': loss | |
| }, f"{self.args.output_dir}/checkpoint_step_{step}.pt") | |
| def save_final_model(self): | |
| os.makedirs(self.args.output_dir, exist_ok=True) | |
| # Save model | |
| torch.save({ | |
| 'model_state_dict': self.model.state_dict(), | |
| 'config': vars(self.model.config) | |
| }, f"{self.args.output_dir}/clean_conversational_model.pt") | |
| # Save tokenizer | |
| self.tokenizer.save_pretrained(self.args.output_dir) | |
| print(f"✅ Clean model saved to {self.args.output_dir}") | |
| def main(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--dataset', type=str, default='data/conversation_final/conversation_train.jsonl') | |
| parser.add_argument('--output_dir', type=str, default='clean_conversational_neo') | |
| parser.add_argument('--epochs', type=int, default=2) # Fewer epochs | |
| parser.add_argument('--batch_size', type=int, default=2) # Slightly larger | |
| parser.add_argument('--lr', type=float, default=1e-5) # Much lower LR | |
| parser.add_argument('--max_length', type=int, default=1024) # Shorter sequences | |
| args = parser.parse_args() | |
| print("🧹 MAP-NEO Mini CLEAN Conversational Fine-Tuning") | |
| print("="*60) | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained('data/tokenizer') | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Load model (use original, not extended for stability) | |
| print("Loading original model for stability...") | |
| checkpoint = torch.load('checkpoints/checkpoint_step_99999.pt', map_location='cpu') | |
| config = NeoMiniConfig() | |
| config.max_seq_len = 2048 # Standard context | |
| model = NeoMini(config) | |
| model.load_state_dict(checkpoint['model_state_dict']) | |
| print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}") | |
| # Load clean dataset | |
| dataset = CleanConversationDataset(args.dataset, tokenizer, args.max_length) | |
| # Train | |
| trainer = SimpleTrainer(model, tokenizer, dataset, args) | |
| trainer.train() | |
| if __name__ == '__main__': | |
| main() | |