mikasenghaas commited on
Commit
16ed168
·
unverified ·
1 Parent(s): e9aca51

Add test tokenization script

Browse files
Files changed (1) hide show
  1. test_tokenization.py +26 -0
test_tokenization.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = ["transformers", "jinja2"]
4
+ # ///
5
+
6
+
7
+ from transformers import AutoTokenizer
8
+
9
+ # Initialize tokenizer
10
+ local_tokenizer = AutoTokenizer.from_pretrained(".")
11
+ qwen3_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-Coder-30B-A3B-Instruct")
12
+
13
+
14
+ # User message with custom system message
15
+ messages = [
16
+ {"role": "system", "content": "You are a helpful assistant."},
17
+ {"role": "user", "content": "What is the capital of France?"},
18
+ ]
19
+
20
+ print("Local")
21
+ print(local_tokenizer.apply_chat_template(messages, tokenize=False))
22
+ print(local_tokenizer.apply_chat_template(messages, tokenize=True))
23
+
24
+ print("\n\nQwen3-Coder")
25
+ print(qwen3_tokenizer.apply_chat_template(messages, tokenize=False))
26
+ print(qwen3_tokenizer.apply_chat_template(messages, tokenize=True))