Tonic commited on
Commit
249d9cf
Β·
1 Parent(s): 6e00c9e

test commit powershell

Browse files
Files changed (1) hide show
  1. test_float16_compatibility.py +0 -96
test_float16_compatibility.py DELETED
@@ -1,96 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Test script for float16 compatibility with pre-quantized model
4
- """
5
-
6
- import torch
7
- from transformers import AutoModelForCausalLM, AutoTokenizer
8
- import logging
9
-
10
- # Set up logging
11
- logging.basicConfig(level=logging.INFO)
12
- logger = logging.getLogger(__name__)
13
-
14
- def test_float16_compatibility():
15
- """Test float16 compatibility with pre-quantized model"""
16
-
17
- model_id = "Tonic/petite-elle-L-aime-3-sft"
18
- device = "cuda" if torch.cuda.is_available() else "cpu"
19
-
20
- logger.info(f"Testing float16 compatibility on device: {device}")
21
-
22
- # Test both float32 and float16
23
- dtypes_to_test = []
24
-
25
- if device == "cuda":
26
- dtypes_to_test = [torch.float32, torch.float16]
27
- else:
28
- dtypes_to_test = [torch.float32] # Only test float32 on CPU
29
-
30
- for dtype in dtypes_to_test:
31
- logger.info(f"\nTesting with dtype: {dtype}")
32
-
33
- try:
34
- # Load tokenizer
35
- tokenizer = AutoTokenizer.from_pretrained(model_id)
36
- if tokenizer.pad_token_id is None:
37
- tokenizer.pad_token_id = tokenizer.eos_token_id
38
-
39
- # Load model with specific dtype
40
- model_kwargs = {
41
- "device_map": "auto" if device == "cuda" else "cpu",
42
- "torch_dtype": dtype,
43
- "trust_remote_code": True,
44
- "low_cpu_mem_usage": True,
45
- }
46
-
47
- logger.info(f"Loading model with {dtype}...")
48
- model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs)
49
-
50
- # Test generation
51
- test_prompt = "Bonjour, comment allez-vous?"
52
- inputs = tokenizer(test_prompt, return_tensors="pt")
53
-
54
- if device == "cuda":
55
- inputs = {k: v.cuda() for k, v in inputs.items()}
56
-
57
- logger.info("Generating response...")
58
- with torch.no_grad():
59
- output_ids = model.generate(
60
- inputs['input_ids'],
61
- max_new_tokens=50,
62
- temperature=0.7,
63
- top_p=0.95,
64
- do_sample=True,
65
- attention_mask=inputs['attention_mask'],
66
- pad_token_id=tokenizer.eos_token_id,
67
- eos_token_id=tokenizer.eos_token_id,
68
- cache_implementation="static"
69
- )
70
-
71
- response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
72
- assistant_response = response[len(test_prompt):].strip()
73
-
74
- logger.info(f"βœ… {dtype} test successful!")
75
- logger.info(f"Input: {test_prompt}")
76
- logger.info(f"Output: {assistant_response}")
77
-
78
- # Check memory usage
79
- if device == "cuda":
80
- memory_used = torch.cuda.memory_allocated() / 1024**3
81
- logger.info(f"GPU Memory used: {memory_used:.2f} GB")
82
-
83
- # Check model dtype
84
- logger.info(f"Model dtype: {model.dtype}")
85
-
86
- # Clean up
87
- del model
88
- torch.cuda.empty_cache() if device == "cuda" else None
89
-
90
- except Exception as e:
91
- logger.error(f"❌ {dtype} test failed: {e}")
92
- import traceback
93
- traceback.print_exc()
94
-
95
- if __name__ == "__main__":
96
- test_float16_compatibility()