Text Generation
Transformers
Safetensors
mistral
conversational
text-generation-inference
zerofata's picture
Update README.md
af1ea8a verified
---
library_name: transformers
license: apache-2.0
datasets:
- zerofata/Roleplay-Anime-Characters
- zerofata/Instruct-Anime-CreativeWriting
- zerofata/Summaries-Anime-FandomPages
base_model:
- mistralai/Mistral-Small-3.2-24B-Instruct-2506
---
<!DOCTYPE html>
<style>
.container {
--bg-main: #0A0C10;
--bg-card: #10121A;
--primary-accent: #FDE43B;
--secondary-accent: #F8602C;
--text-main: #F0F2F5;
--text-dark: #10121A;
--white: #FFFFFF;
--font-title: 'Syncopate', sans-serif;
--font-heading: 'Rajdhani', sans-serif;
--font-body: 'Exo 2', sans-serif;
--font-code: 'JetBrains Mono', monospace;
font-family: var(--font-body);
color: var(--text-main);
line-height: 1.7;
max-width: 900px;
margin: 40px auto;
background-color: var(--bg-card);
border: 2px solid var(--secondary-accent);
padding: 25px 40px;
box-shadow: 0 0 10px rgba(248, 96, 44, 0.4), 0 0 30px rgba(253, 228, 59, 0.2);
position: relative;
clip-path: polygon(0 0, 100% 0, 100% 100%, 15px 100%, 0 calc(100% - 15px));
}
.container .title-container {
text-align: left;
padding-bottom: 25px;
margin-bottom: 35px;
border-bottom: 2px solid var(--primary-accent);
position: relative;
}
.container .title-container::before,
.container .title-container::after {
all: unset;
}
.container .title-main {
font-family: var(--font-title);
font-size: 3rem;
font-weight: 700;
color: var(--white);
text-transform: uppercase;
letter-spacing: 5px;
margin: 0;
text-shadow: 0 0 12px rgba(253, 228, 59, 0.7);
}
.container .lemonade-text {
color: var(--primary-accent);
}
.container .subtitle-text {
font-family: var(--font-heading);
font-size: 1.2rem;
font-weight: 600;
color: var(--secondary-accent);
text-transform: uppercase;
letter-spacing: 1px;
text-shadow: 0 0 8px rgba(248, 96, 44, 0.6);
}
.container .glitchy-overlay,
.container .title-wrapper,
.container .title-prefix,
.container .title-subtitle {
all: unset;
}
.container .title-subtitle { display: block; margin-top: 5px; }
.container img {
max-width: 100%;
border: 2px solid var(--white);
margin-bottom: 30px;
box-shadow: 0 0 15px rgba(255, 255, 255, 0.2);
transform: rotate(-1deg);
}
.container .section-container {
margin-bottom: 35px;
padding-bottom: 35px;
position: relative;
border: none;
}
.container .section-container:not(:last-child)::after {
content: '';
position: absolute;
bottom: 0;
left: 5%;
right: 5%;
height: 1px;
background: linear-gradient(90deg, var(--bg-card), var(--primary-accent), var(--bg-card));
transform: skewY(-2deg);
}
.container .section-header,
.container .section-content {
all: unset;
display: block;
}
.container .section-title {
font-family: var(--font-heading);
font-size: 1.6rem;
font-weight: 600;
color: var(--text-dark);
background-color: var(--primary-accent);
margin-bottom: 25px;
text-transform: uppercase;
letter-spacing: 1.5px;
display: inline-block;
padding: 8px 30px 8px 20px;
clip-path: polygon(0 0, 100% 0, calc(100% - 20px) 100%, 0% 100%);
}
.container .subheading {
font-family: var(--font-heading);
font-size: 1.2rem;
color: var(--secondary-accent);
font-weight: 600;
text-transform: uppercase;
letter-spacing: 1px;
margin-top: 25px;
margin-bottom: 15px;
border: none;
}
.container .data-box {
background-color: var(--bg-main);
border: 1px solid var(--secondary-accent);
padding: 20px 25px;
margin-top: 15px;
clip-path: polygon(15px 0, 100% 0, 100% calc(100% - 15px), 0 100%, 0 15px);
}
.container .data-row {
display: flex;
margin-bottom: 12px;
align-items: center;
}
.container .data-row:last-child { margin-bottom: 0; }
.container .data-arrow { display: none; }
.container .data-label {
color: var(--text-main);
width: 90px;
font-weight: 600;
flex-shrink: 0;
opacity: 0.8;
}
.container a {
color: var(--secondary-accent);
text-decoration: none;
font-weight: 600;
transition: color .3s;
}
.container a:hover {
color: var(--primary-accent);
}
/* Add line hover effect only to specific text links */
.container .data-box a {
position: relative;
background-image: linear-gradient(to top, var(--primary-accent), var(--primary-accent));
background-position: 0 100%;
background-repeat: no-repeat;
background-size: 0% 2px;
transition: background-size .3s, color .3s;
}
.container .data-box a:hover {
color: var(--primary-accent);
background-size: 100% 2px;
}
.container .dropdown-container {
margin-top: 25px;
}
.container .dropdown-summary {
cursor: pointer;
color: var(--secondary-accent);
font-size: 1.2rem;
font-family: var(--font-heading);
font-weight: 600;
text-transform: uppercase;
letter-spacing: 1px;
list-style: none;
padding: 5px 0;
display: flex;
align-items: center;
}
.container .dropdown-summary::-webkit-details-marker { display: none; }
.container .dropdown-arrow {
color: var(--primary-accent);
margin-right: 15px;
transition: transform 0.2s ease;
display: inline-block;
}
.container .dropdown-container[open] .dropdown-arrow {
transform: rotate(90deg);
}
.container .dropdown-content {
margin-top: 15px;
padding: 20px;
background-color: var(--bg-main);
border-left: 3px solid var(--primary-accent);
}
.container .config-title {
color: var(--secondary-accent);
font-size: 1rem;
margin-bottom: 10px;
font-family: var(--font-heading);
text-transform: uppercase;
letter-spacing: 1px;
}
.container pre {
background-color: var(--bg-main);
padding: 15px;
border: 1px solid rgba(248, 96, 44, 0.4);
white-space: pre-wrap;
word-wrap: break-word;
color: var(--text-main);
}
.container code {
font-family: var(--font-code);
}
</style>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Painted Fantasy</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Syncopate:wght@700&family=Rajdhani:wght@600&family=Exo+2:wght@400;600&family=JetBrains+Mono:wght@400;700&display=swap" rel="stylesheet">
</head>
<body>
<div class="container">
<div class="title-container">
<!-- Glitchy overlay -->
<div class="glitchy-overlay"></div>
<!-- Main title -->
<div class="title-wrapper">
<h1 class="title-main">
PAINTED <span class="lemonade-text">FANTASY</span> VISAGE
</h1>
<div class="title-subtitle">
<span class="subtitle-text">Mistrall Small 3.2 Upscaled 33B</span>
</div>
</div>
</div>
![image/png](https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/CQeog2SHdGUdmx8vHqL71.png)
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">Overview</h2>
</div>
<div class="section-content">
<p>Another experimental release. Mistral Small 3.2 24B upscaled by 18 layers to create a 33.6B model. This model then went through pretraining, SFT & DPO.</p>
<p>Can't guarantee the Mistral 3.2 repetition issues are fixed, but this model seems to be less repetitive than my previous attempt.</p>
<p>This is an uncensored creative model intended to excel at character driven RP / ERP where characters are portrayed creatively and proactively.</p>
</div>
</div>
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">SillyTavern Settings</h2>
</div>
<div class="section-content">
<h3 class="subheading">Recommended Roleplay Format</h3>
<div class="data-box">
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Actions:</span>
<span>In plaintext</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Dialogue:</span>
<span>"In quotes"</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Thoughts:</span>
<span>*In asterisks*</span>
</div>
</div>
<h3 class="subheading">Recommended Samplers</h3>
<div class="data-box">
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Temp:</span>
<span>0.6</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">MinP:</span>
<span>0.03 - 0.05</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">TopP:</span>
<span>0.95 - 1.0</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Dry:</span>
<span>0.8, 1.75, 4</span>
</div>
</div>
<h3 class="subheading">Instruct</h3>
<div class="data-box">
<p style="margin: 0;">Mistral v7 Tekken</p>
</div>
</div>
</div>
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">Quantizations</h2>
</div>
<div class="section-content">
<div style="margin-bottom: 20px;">
<h3 class="subheading">GGUF</h3>
<div class="data-box">
<div class="data-row">
<a href="https://huggingface.co/mradermacher/MS3.2-PaintedFantasy-Visage-33B-GGUF">Static (mradermacher)</a>
</div>
<div class="data-row">
<a href="https://huggingface.co/mradermacher/MS3.2-PaintedFantasy-Visage-33B-i1-GGUF">iMatrix (mradermacher)</a>
</div>
</div>
</div>
<div>
<h3 class="subheading">EXL3</h3>
<div class="data-box">
<div class="data-row">
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-33B_exl3_3bpw">3bpw</a>
</div>
<div class="data-row">
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-33B_exl3_4bpw">4bpw</a>
</div>
<div class="data-row">
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-33B_exl3_5bpw">5bpw</a>
</div>
<div class="data-row">
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-33B_exl3_6bpw">6bpw</a>
</div>
</div>
</div>
</div>
</div>
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">Creation Process</h2>
</div>
<div class="section-content">
<p>Creation process: Upscale > Pretrain > SFT > DPO</p>
<p>All training was qlora (including pretrain).</p>
<p>Pretrained on 177MB of data. Dataset consisteted mostly of Light Novels, NSFW stories, SFW stories and filled out with general corpus text from Huggingface FineWeb-2 dataset.</p>
<p>The model then went through SFT using a dataset of approx 3.6 million tokens, 700 RP conversations, 1000 creative writing / instruct samples and about 100 summaries. The bulk of this data has been made public.</p>
<p>Finally, DPO was used to make the model more consistent.</p>
<div class="dropdown-container">
<details>
<summary class="dropdown-summary">
<span class="dropdown-arrow">></span>
Mergekit Config
</summary>
<div class="dropdown-content">
<pre><code>base_model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
merge_method: passthrough
dtype: bfloat16
slices:
- sources:
- model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
layer_range: [0, 29]
- sources:
- model: anthracite-core/Mistral-Small-3.2-24B-Instruct-2506-Text-Only
layer_range: [10, 39]
</code></pre>
</div>
</details>
</div>
<div class="dropdown-container">
<details>
<summary class="dropdown-summary">
<span class="dropdown-arrow">></span>
Axolotl configs
</summary>
<div class="dropdown-content">
<p>Not optimized for cost / performance efficiency, YMMV.</p>
<div class="config-title">SFT 1*H100</div>
<pre><code>&#35; ====================
&#35; MODEL CONFIGURATION
&#35; ====================
base_model: ./Upscale_Mistral-PT/merged
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
chat_template: mistral_v7_tekken
<br>
&#35; ====================
&#35; DATASET CONFIGURATION
&#35; ====================
datasets:
- path: ./dataset.jsonl
type: chat_template
split: train
chat_template_strategy: tokenizer
field_messages: messages
message_property_mappings:
role: role
content: content
roles:
user: ["user"]
assistant: ["assistant"]
system: ["system"]
dataset_prepared_path:
train_on_inputs: false &#35; Only train on assistant responses
&#35; ====================
&#35; QLORA CONFIGURATION
&#35; ====================
adapter: qlora
load_in_4bit: true
lora_r: 128
lora_alpha: 128
lora_dropout: 0.1
lora_target_linear: true
&#35; lora_modules_to_save: &#35; Uncomment only if you added NEW tokens
&#35; ====================
&#35; TRAINING PARAMETERS
&#35; ====================
num_epochs: 2
micro_batch_size: 4
gradient_accumulation_steps: 2
learning_rate: 1.5e-5
optimizer: paged_adamw_8bit
lr_scheduler: rex
warmup_ratio: 0.05
weight_decay: 0.01
max_grad_norm: 1.0
&#35; ====================
&#35; SEQUENCE &amp; PACKING
&#35; ====================
sequence_len: 8192
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
&#35; ====================
&#35; HARDWARE OPTIMIZATIONS
&#35; ====================
bf16: auto
flash_attention: true
gradient_checkpointing: true
&#35; ====================
&#35; EVALUATION &amp; CHECKPOINTING
&#35; ====================
save_strategy: steps
save_steps: 5
save_total_limit: 5 &#35; Keep best + last few checkpoints
load_best_model_at_end: true
greater_is_better: false
&#35; ====================
&#35; LOGGING &amp; OUTPUT
&#35; ====================
output_dir: ./Upscale_Mistral-PT-SFT-2
logging_steps: 2
save_safetensors: true
&#35; ====================
&#35; WANDB TRACKING
&#35; ====================
wandb_project: MS3-2-SFT
wandb_entity: your_entity
wandb_name: run_name<p></p></code></pre>
</div>
</details>
</div>
</div>
</div>
</div>
</body>
</html>