Spaces:
Running
on
Zero
Running
on
Zero
| #!/usr/bin/env python3 | |
| """ | |
| Advanced helper script to download the int4 model files using HfFileSystem | |
| """ | |
| import os | |
| import sys | |
| import logging | |
| from pathlib import Path | |
| from tqdm import tqdm | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Model configuration | |
| MAIN_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft" | |
| INT4_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft/int4" | |
| LOCAL_MODEL_PATH = "./int4" | |
| def get_file_info(fs, repo_path): | |
| """Get detailed information about files in the repository""" | |
| try: | |
| files = fs.ls(repo_path, detail=True) | |
| return [f for f in files if f['type'] == 'file'] | |
| except Exception as e: | |
| logger.error(f"Error listing files in {repo_path}: {e}") | |
| return [] | |
| def download_with_progress(fs, remote_path, local_path, file_size): | |
| """Download a file with progress bar""" | |
| try: | |
| # Create directory if it doesn't exist | |
| os.makedirs(os.path.dirname(local_path), exist_ok=True) | |
| # Download with progress bar | |
| with tqdm(total=file_size, unit='B', unit_scale=True, desc=os.path.basename(local_path)) as pbar: | |
| with fs.open(remote_path, 'rb') as remote_file: | |
| with open(local_path, 'wb') as local_file: | |
| chunk_size = 8192 | |
| while True: | |
| chunk = remote_file.read(chunk_size) | |
| if not chunk: | |
| break | |
| local_file.write(chunk) | |
| pbar.update(len(chunk)) | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error downloading {remote_path}: {e}") | |
| return False | |
| def download_model_advanced(): | |
| """Download the int4 model files using advanced HfFileSystem features""" | |
| try: | |
| logger.info(f"Downloading int4 model from {INT4_MODEL_ID}") | |
| # Create local directory if it doesn't exist | |
| os.makedirs(LOCAL_MODEL_PATH, exist_ok=True) | |
| # Use HfFileSystem for downloading | |
| from huggingface_hub import HfFileSystem | |
| # Initialize the file system | |
| fs = HfFileSystem() | |
| # Check if repository exists | |
| if not fs.exists(INT4_MODEL_ID): | |
| logger.error(f"Repository {INT4_MODEL_ID} does not exist") | |
| return False | |
| # Get file information | |
| files = get_file_info(fs, INT4_MODEL_ID) | |
| if not files: | |
| logger.error("No files found in repository") | |
| return False | |
| # Filter essential model files | |
| essential_files = [ | |
| 'config.json', | |
| 'pytorch_model.bin', | |
| 'tokenizer.json', | |
| 'tokenizer_config.json', | |
| 'special_tokens_map.json', | |
| 'generation_config.json' | |
| ] | |
| files_to_download = [] | |
| for file_info in files: | |
| file_name = os.path.basename(file_info['name']) | |
| if file_name in essential_files: | |
| files_to_download.append(file_info) | |
| logger.info(f"Found {len(files_to_download)} essential files to download") | |
| # Download each file | |
| successful_downloads = 0 | |
| for file_info in files_to_download: | |
| file_path = file_info['name'] | |
| file_name = os.path.basename(file_path) | |
| local_file_path = os.path.join(LOCAL_MODEL_PATH, file_name) | |
| file_size = file_info.get('size', 0) | |
| logger.info(f"Downloading {file_name} ({file_size} bytes)...") | |
| # Download the file with progress | |
| if download_with_progress(fs, file_path, local_file_path, file_size): | |
| successful_downloads += 1 | |
| logger.info(f"Successfully downloaded {file_name}") | |
| else: | |
| logger.error(f"Failed to download {file_name}") | |
| logger.info(f"Downloaded {successful_downloads}/{len(files_to_download)} files") | |
| return successful_downloads == len(files_to_download) | |
| except Exception as e: | |
| logger.error(f"Error downloading model: {e}") | |
| return False | |
| def verify_download_advanced(): | |
| """Advanced verification of downloaded model files""" | |
| try: | |
| logger.info("Verifying downloaded model files...") | |
| # Expected file sizes (approximate) | |
| expected_files = { | |
| "config.json": (1000, 10000), # (min_size, max_size) in bytes | |
| "pytorch_model.bin": (1000000, 5000000000), # Should be several MB | |
| "tokenizer.json": (10000, 1000000), # Should be several KB | |
| "tokenizer_config.json": (100, 10000), # Minimum size | |
| "special_tokens_map.json": (100, 10000), | |
| "generation_config.json": (100, 10000) | |
| } | |
| verification_results = [] | |
| for file_name, (min_size, max_size) in expected_files.items(): | |
| file_path = os.path.join(LOCAL_MODEL_PATH, file_name) | |
| if os.path.exists(file_path): | |
| actual_size = os.path.getsize(file_path) | |
| if min_size <= actual_size <= max_size: | |
| logger.info(f"β {file_name} verified ({actual_size} bytes)") | |
| verification_results.append(True) | |
| else: | |
| logger.warning(f"β οΈ {file_name} size unexpected ({actual_size} bytes)") | |
| verification_results.append(False) | |
| else: | |
| logger.error(f"β Missing {file_name}") | |
| verification_results.append(False) | |
| success_rate = sum(verification_results) / len(verification_results) | |
| logger.info(f"Verification complete: {sum(verification_results)}/{len(verification_results)} files valid") | |
| return success_rate >= 0.8 # Allow 20% tolerance | |
| except Exception as e: | |
| logger.error(f"Error verifying files: {e}") | |
| return False | |
| def check_model_files(): | |
| """Check if required model files exist""" | |
| required_files = [ | |
| "config.json", | |
| "pytorch_model.bin", | |
| "tokenizer.json", | |
| "tokenizer_config.json" | |
| ] | |
| missing_files = [] | |
| for file in required_files: | |
| file_path = os.path.join(LOCAL_MODEL_PATH, file) | |
| if not os.path.exists(file_path): | |
| missing_files.append(file) | |
| if missing_files: | |
| logger.error(f"Missing model files: {missing_files}") | |
| return False | |
| logger.info("All required model files found") | |
| return True | |
| def main(): | |
| """Main function to download model at build time""" | |
| logger.info("Starting advanced model download for Hugging Face Space...") | |
| # Check if model files already exist | |
| if check_model_files(): | |
| logger.info("Model files already exist, skipping download") | |
| return True | |
| # Download the model using advanced method | |
| if download_model_advanced(): | |
| # Verify the download | |
| if verify_download_advanced(): | |
| logger.info("Model download and verification completed successfully") | |
| return True | |
| else: | |
| logger.error("Model verification failed") | |
| return False | |
| else: | |
| logger.error("Model download failed") | |
| return False | |
| if __name__ == "__main__": | |
| success = main() | |
| sys.exit(0 if success else 1) |