|
|
--- |
|
|
library_name: transformers |
|
|
license: apache-2.0 |
|
|
datasets: |
|
|
- zerofata/Roleplay-Anime-Characters |
|
|
- zerofata/Instruct-Anime-CreativeWriting |
|
|
- zerofata/Summaries-Anime-FandomPages |
|
|
base_model: |
|
|
- mistralai/Mistral-Small-3.2-24B-Instruct-2506 |
|
|
--- |
|
|
<!DOCTYPE html> |
|
|
<style> |
|
|
.container { |
|
|
--bg-main: #0A0C10; |
|
|
--bg-card: #10121A; |
|
|
--primary-accent: #FDE43B; |
|
|
--secondary-accent: #F8602C; |
|
|
--text-main: #F0F2F5; |
|
|
--text-dark: #10121A; |
|
|
--white: #FFFFFF; |
|
|
--font-title: 'Syncopate', sans-serif; |
|
|
--font-heading: 'Rajdhani', sans-serif; |
|
|
--font-body: 'Exo 2', sans-serif; |
|
|
--font-code: 'JetBrains Mono', monospace; |
|
|
|
|
|
font-family: var(--font-body); |
|
|
color: var(--text-main); |
|
|
line-height: 1.7; |
|
|
|
|
|
max-width: 900px; |
|
|
margin: 40px auto; |
|
|
background-color: var(--bg-card); |
|
|
border: 2px solid var(--secondary-accent); |
|
|
padding: 25px 40px; |
|
|
box-shadow: 0 0 10px rgba(248, 96, 44, 0.4), 0 0 30px rgba(253, 228, 59, 0.2); |
|
|
position: relative; |
|
|
clip-path: polygon(0 0, 100% 0, 100% 100%, 15px 100%, 0 calc(100% - 15px)); |
|
|
} |
|
|
|
|
|
.container .title-container { |
|
|
text-align: left; |
|
|
padding-bottom: 25px; |
|
|
margin-bottom: 35px; |
|
|
border-bottom: 2px solid var(--primary-accent); |
|
|
position: relative; |
|
|
} |
|
|
|
|
|
.container .title-container::before, |
|
|
.container .title-container::after { |
|
|
all: unset; |
|
|
} |
|
|
|
|
|
.container .title-main { |
|
|
font-family: var(--font-title); |
|
|
font-size: 3rem; |
|
|
font-weight: 700; |
|
|
color: var(--white); |
|
|
text-transform: uppercase; |
|
|
letter-spacing: 5px; |
|
|
margin: 0; |
|
|
text-shadow: 0 0 12px rgba(253, 228, 59, 0.7); |
|
|
} |
|
|
|
|
|
.container .lemonade-text { |
|
|
color: var(--primary-accent); |
|
|
} |
|
|
|
|
|
.container .subtitle-text { |
|
|
font-family: var(--font-heading); |
|
|
font-size: 1.2rem; |
|
|
font-weight: 600; |
|
|
color: var(--secondary-accent); |
|
|
text-transform: uppercase; |
|
|
letter-spacing: 1px; |
|
|
text-shadow: 0 0 8px rgba(248, 96, 44, 0.6); |
|
|
} |
|
|
|
|
|
.container .glitchy-overlay, |
|
|
.container .title-wrapper, |
|
|
.container .title-prefix, |
|
|
.container .title-subtitle { |
|
|
all: unset; |
|
|
} |
|
|
.container .title-subtitle { display: block; margin-top: 5px; } |
|
|
|
|
|
.container img { |
|
|
max-width: 100%; |
|
|
border: 2px solid var(--white); |
|
|
margin-bottom: 30px; |
|
|
box-shadow: 0 0 15px rgba(255, 255, 255, 0.2); |
|
|
transform: rotate(-1deg); |
|
|
} |
|
|
|
|
|
.container .section-container { |
|
|
margin-bottom: 35px; |
|
|
padding-bottom: 35px; |
|
|
position: relative; |
|
|
border: none; |
|
|
} |
|
|
.container .section-container:not(:last-child)::after { |
|
|
content: ''; |
|
|
position: absolute; |
|
|
bottom: 0; |
|
|
left: 5%; |
|
|
right: 5%; |
|
|
height: 1px; |
|
|
background: linear-gradient(90deg, var(--bg-card), var(--primary-accent), var(--bg-card)); |
|
|
transform: skewY(-2deg); |
|
|
} |
|
|
|
|
|
.container .section-header, |
|
|
.container .section-content { |
|
|
all: unset; |
|
|
display: block; |
|
|
} |
|
|
|
|
|
.container .section-title { |
|
|
font-family: var(--font-heading); |
|
|
font-size: 1.6rem; |
|
|
font-weight: 600; |
|
|
color: var(--text-dark); |
|
|
background-color: var(--primary-accent); |
|
|
margin-bottom: 25px; |
|
|
text-transform: uppercase; |
|
|
letter-spacing: 1.5px; |
|
|
display: inline-block; |
|
|
padding: 8px 30px 8px 20px; |
|
|
clip-path: polygon(0 0, 100% 0, calc(100% - 20px) 100%, 0% 100%); |
|
|
} |
|
|
|
|
|
.container .subheading { |
|
|
font-family: var(--font-heading); |
|
|
font-size: 1.2rem; |
|
|
color: var(--secondary-accent); |
|
|
font-weight: 600; |
|
|
text-transform: uppercase; |
|
|
letter-spacing: 1px; |
|
|
margin-top: 25px; |
|
|
margin-bottom: 15px; |
|
|
border: none; |
|
|
} |
|
|
|
|
|
.container .data-box { |
|
|
background-color: var(--bg-main); |
|
|
border: 1px solid var(--secondary-accent); |
|
|
padding: 20px 25px; |
|
|
margin-top: 15px; |
|
|
clip-path: polygon(15px 0, 100% 0, 100% calc(100% - 15px), 0 100%, 0 15px); |
|
|
} |
|
|
|
|
|
.container .data-row { |
|
|
display: flex; |
|
|
margin-bottom: 12px; |
|
|
align-items: center; |
|
|
} |
|
|
.container .data-row:last-child { margin-bottom: 0; } |
|
|
.container .data-arrow { display: none; } |
|
|
|
|
|
.container .data-label { |
|
|
color: var(--text-main); |
|
|
width: 90px; |
|
|
font-weight: 600; |
|
|
flex-shrink: 0; |
|
|
opacity: 0.8; |
|
|
} |
|
|
|
|
|
.container a { |
|
|
color: var(--secondary-accent); |
|
|
text-decoration: none; |
|
|
font-weight: 600; |
|
|
transition: color .3s; |
|
|
} |
|
|
|
|
|
.container a:hover { |
|
|
color: var(--primary-accent); |
|
|
} |
|
|
|
|
|
/* Add line hover effect only to specific text links */ |
|
|
.container .data-box a { |
|
|
position: relative; |
|
|
background-image: linear-gradient(to top, var(--primary-accent), var(--primary-accent)); |
|
|
background-position: 0 100%; |
|
|
background-repeat: no-repeat; |
|
|
background-size: 0% 2px; |
|
|
transition: background-size .3s, color .3s; |
|
|
} |
|
|
|
|
|
.container .data-box a:hover { |
|
|
color: var(--primary-accent); |
|
|
background-size: 100% 2px; |
|
|
} |
|
|
|
|
|
.container .dropdown-container { |
|
|
margin-top: 25px; |
|
|
} |
|
|
|
|
|
.container .dropdown-summary { |
|
|
cursor: pointer; |
|
|
color: var(--secondary-accent); |
|
|
font-size: 1.2rem; |
|
|
font-family: var(--font-heading); |
|
|
font-weight: 600; |
|
|
text-transform: uppercase; |
|
|
letter-spacing: 1px; |
|
|
list-style: none; |
|
|
padding: 5px 0; |
|
|
display: flex; |
|
|
align-items: center; |
|
|
} |
|
|
.container .dropdown-summary::-webkit-details-marker { display: none; } |
|
|
|
|
|
.container .dropdown-arrow { |
|
|
color: var(--primary-accent); |
|
|
margin-right: 15px; |
|
|
transition: transform 0.2s ease; |
|
|
display: inline-block; |
|
|
} |
|
|
.container .dropdown-container[open] .dropdown-arrow { |
|
|
transform: rotate(90deg); |
|
|
} |
|
|
|
|
|
.container .dropdown-content { |
|
|
margin-top: 15px; |
|
|
padding: 20px; |
|
|
background-color: var(--bg-main); |
|
|
border-left: 3px solid var(--primary-accent); |
|
|
} |
|
|
|
|
|
.container .config-title { |
|
|
color: var(--secondary-accent); |
|
|
font-size: 1rem; |
|
|
margin-bottom: 10px; |
|
|
font-family: var(--font-heading); |
|
|
text-transform: uppercase; |
|
|
letter-spacing: 1px; |
|
|
} |
|
|
|
|
|
.container pre { |
|
|
background-color: var(--bg-main); |
|
|
padding: 15px; |
|
|
border: 1px solid rgba(248, 96, 44, 0.4); |
|
|
white-space: pre-wrap; |
|
|
word-wrap: break-word; |
|
|
color: var(--text-main); |
|
|
} |
|
|
.container code { |
|
|
font-family: var(--font-code); |
|
|
} |
|
|
</style> |
|
|
<html lang="en"> |
|
|
<head> |
|
|
<meta charset="UTF-8"> |
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
|
<title>Painted Fantasy</title> |
|
|
<link rel="preconnect" href="https://fonts.googleapis.com"> |
|
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> |
|
|
<link href="https://fonts.googleapis.com/css2?family=Syncopate:wght@700&family=Rajdhani:wght@600&family=Exo+2:wght@400;600&family=JetBrains+Mono:wght@400;700&display=swap" rel="stylesheet"> |
|
|
</head> |
|
|
<body> |
|
|
|
|
|
<div class="container"> |
|
|
<div class="title-container"> |
|
|
<!-- Glitchy overlay --> |
|
|
<div class="glitchy-overlay"></div> |
|
|
<!-- Main title --> |
|
|
<div class="title-wrapper"> |
|
|
<h1 class="title-main"> |
|
|
PAINTED <span class="lemonade-text">FANTASY</span> VISAGE |
|
|
</h1> |
|
|
<div class="title-subtitle"> |
|
|
<span class="subtitle-text">Mistrall Small 3.2 Upscaled 33B</span> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
 |
|
|
|
|
|
<div class="section-container"> |
|
|
<div class="section-header"> |
|
|
<div class="section-indicator"></div> |
|
|
<h2 class="section-title">Overview</h2> |
|
|
</div> |
|
|
<div class="section-content"> |
|
|
<p>Another experimental release. Mistral Small 3.2 24B upscaled by 18 layers to create a 33.6B model. This model then went through pretraining, SFT & DPO.</p> |
|
|
<p>Can't guarantee the Mistral 3.2 repetition issues are fixed, but this model seems to be less repetitive than my previous attempt.</p> |
|
|
<p>This is an uncensored creative model intended to excel at character driven RP / ERP where characters are portrayed creatively and proactively.</p> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="section-container"> |
|
|
<div class="section-header"> |
|
|
<div class="section-indicator"></div> |
|
|
<h2 class="section-title">SillyTavern Settings</h2> |
|
|
</div> |
|
|
<div class="section-content"> |
|
|
<h3 class="subheading">Recommended Roleplay Format</h3> |
|
|
<div class="data-box"> |
|
|
<div class="data-row"> |
|
|
<span class="data-arrow">></span> |
|
|
<span class="data-label">Actions:</span> |
|
|
<span>In plaintext</span> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<span class="data-arrow">></span> |
|
|
<span class="data-label">Dialogue:</span> |
|
|
<span>"In quotes"</span> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<span class="data-arrow">></span> |
|
|
<span class="data-label">Thoughts:</span> |
|
|
<span>*In asterisks*</span> |
|
|
</div> |
|
|
</div> |
|
|
<h3 class="subheading">Recommended Samplers</h3> |
|
|
<div class="data-box"> |
|
|
<div class="data-row"> |
|
|
<span class="data-arrow">></span> |
|
|
<span class="data-label">Temp:</span> |
|
|
<span>0.6</span> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<span class="data-arrow">></span> |
|
|
<span class="data-label">MinP:</span> |
|
|
<span>0.03 - 0.05</span> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<span class="data-arrow">></span> |
|
|
<span class="data-label">TopP:</span> |
|
|
<span>0.95 - 1.0</span> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<span class="data-arrow">></span> |
|
|
<span class="data-label">Dry:</span> |
|
|
<span>0.8, 1.75, 4</span> |
|
|
</div> |
|
|
</div> |
|
|
<h3 class="subheading">Instruct</h3> |
|
|
<div class="data-box"> |
|
|
<p style="margin: 0;">Mistral v7 Tekken</p> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="section-container"> |
|
|
<div class="section-header"> |
|
|
<div class="section-indicator"></div> |
|
|
<h2 class="section-title">Quantizations</h2> |
|
|
</div> |
|
|
<div class="section-content"> |
|
|
<div style="margin-bottom: 20px;"> |
|
|
<h3 class="subheading">GGUF</h3> |
|
|
<div class="data-box"> |
|
|
<div class="data-row"> |
|
|
<a href="https://huggingface.co/mradermacher/MS3.2-PaintedFantasy-Visage-33B-GGUF">Static (mradermacher)</a> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<a href="https://huggingface.co/mradermacher/MS3.2-PaintedFantasy-Visage-33B-i1-GGUF">iMatrix (mradermacher)</a> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
<div> |
|
|
<h3 class="subheading">EXL3</h3> |
|
|
<div class="data-box"> |
|
|
<div class="data-row"> |
|
|
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-33B_exl3_3bpw">3bpw</a> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-33B_exl3_4bpw">4bpw</a> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-33B_exl3_5bpw">5bpw</a> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-33B_exl3_6bpw">6bpw</a> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="section-container"> |
|
|
<div class="section-header"> |
|
|
<div class="section-indicator"></div> |
|
|
<h2 class="section-title">Creation Process</h2> |
|
|
</div> |
|
|
<div class="section-content"> |
|
|
<p>Creation process: Upscale > Pretrain > SFT > DPO</p> |
|
|
<p>All training was qlora (including pretrain).</p> |
|
|
<p>Pretrained on 177MB of data. Dataset consisteted mostly of Light Novels, NSFW stories, SFW stories and filled out with general corpus text from Huggingface FineWeb-2 dataset.</p> |
|
|
<p>The model then went through SFT using a dataset of approx 3.6 million tokens, 700 RP conversations, 1000 creative writing / instruct samples and about 100 summaries. The bulk of this data has been made public.</p> |
|
|
<p>Finally, DPO was used to make the model more consistent.</p> |
|
|
<div class="dropdown-container"> |
|
|
<details> |
|
|
<summary class="dropdown-summary"> |
|
|
<span class="dropdown-arrow">></span> |
|
|
Mergekit Config |
|
|
</summary> |
|
|
<div class="dropdown-content"> |
|
|
<pre><code>base_model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only |
|
|
merge_method: passthrough |
|
|
dtype: bfloat16 |
|
|
slices: |
|
|
- sources: |
|
|
- model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only |
|
|
layer_range: [0, 29] |
|
|
- sources: |
|
|
- model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only |
|
|
layer_range: [10, 39] |
|
|
</code></pre> |
|
|
</div> |
|
|
</details> |
|
|
</div> |
|
|
<div class="dropdown-container"> |
|
|
<details> |
|
|
<summary class="dropdown-summary"> |
|
|
<span class="dropdown-arrow">></span> |
|
|
Axolotl configs |
|
|
</summary> |
|
|
<div class="dropdown-content"> |
|
|
<p>Not optimized for cost / performance efficiency, YMMV.</p> |
|
|
<div class="config-title">SFT 1*H100</div> |
|
|
<pre><code># ==================== |
|
|
# MODEL CONFIGURATION |
|
|
# ==================== |
|
|
base_model: ./Upscale_Mistral-PT/merged |
|
|
model_type: AutoModelForCausalLM |
|
|
tokenizer_type: AutoTokenizer |
|
|
chat_template: mistral_v7_tekken |
|
|
<br> |
|
|
# ==================== |
|
|
# DATASET CONFIGURATION |
|
|
# ==================== |
|
|
datasets: |
|
|
- path: ./dataset.jsonl |
|
|
type: chat_template |
|
|
split: train |
|
|
chat_template_strategy: tokenizer |
|
|
field_messages: messages |
|
|
message_property_mappings: |
|
|
role: role |
|
|
content: content |
|
|
roles: |
|
|
user: ["user"] |
|
|
assistant: ["assistant"] |
|
|
system: ["system"] |
|
|
|
|
|
dataset_prepared_path: |
|
|
train_on_inputs: false # Only train on assistant responses |
|
|
|
|
|
# ==================== |
|
|
# QLORA CONFIGURATION |
|
|
# ==================== |
|
|
adapter: qlora |
|
|
load_in_4bit: true |
|
|
lora_r: 128 |
|
|
lora_alpha: 128 |
|
|
lora_dropout: 0.1 |
|
|
lora_target_linear: true |
|
|
# lora_modules_to_save: # Uncomment only if you added NEW tokens |
|
|
|
|
|
# ==================== |
|
|
# TRAINING PARAMETERS |
|
|
# ==================== |
|
|
num_epochs: 2 |
|
|
micro_batch_size: 4 |
|
|
gradient_accumulation_steps: 2 |
|
|
learning_rate: 1.5e-5 |
|
|
optimizer: paged_adamw_8bit |
|
|
lr_scheduler: rex |
|
|
warmup_ratio: 0.05 |
|
|
weight_decay: 0.01 |
|
|
max_grad_norm: 1.0 |
|
|
|
|
|
# ==================== |
|
|
# SEQUENCE & PACKING |
|
|
# ==================== |
|
|
sequence_len: 8192 |
|
|
sample_packing: true |
|
|
eval_sample_packing: false |
|
|
pad_to_sequence_len: true |
|
|
|
|
|
# ==================== |
|
|
# HARDWARE OPTIMIZATIONS |
|
|
# ==================== |
|
|
bf16: auto |
|
|
flash_attention: true |
|
|
gradient_checkpointing: true |
|
|
|
|
|
# ==================== |
|
|
# EVALUATION & CHECKPOINTING |
|
|
# ==================== |
|
|
save_strategy: steps |
|
|
save_steps: 5 |
|
|
save_total_limit: 5 # Keep best + last few checkpoints |
|
|
load_best_model_at_end: true |
|
|
greater_is_better: false |
|
|
|
|
|
# ==================== |
|
|
# LOGGING & OUTPUT |
|
|
# ==================== |
|
|
output_dir: ./Upscale_Mistral-PT-SFT-2 |
|
|
logging_steps: 2 |
|
|
save_safetensors: true |
|
|
|
|
|
# ==================== |
|
|
# WANDB TRACKING |
|
|
# ==================== |
|
|
wandb_project: MS3-2-SFT |
|
|
wandb_entity: your_entity |
|
|
wandb_name: run_name<p></p></code></pre> |
|
|
</div> |
|
|
</details> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
</body> |
|
|
</html> |