ArtusDev commited on 12 days ago

Commit

20919e0

verified ·

1 Parent(s): 7d8437d

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

.gitattributes +4 -0
MODEL-LICENSE +96 -0
Modelfile +13 -0
README.md +241 -0
chat_template.jinja +103 -0
config.json +55 -0
dmind-2-performance.jpeg +3 -0
dmind-ai-logo.png +0 -0
generation_config.json +10 -0
model-00001-of-00005.safetensors +3 -0
model-00002-of-00005.safetensors +3 -0
model-00003-of-00005.safetensors +3 -0
model-00004-of-00005.safetensors +3 -0
model-00005-of-00005.safetensors +3 -0
model.safetensors.index.json +0 -0
quantization_config.json +3 -0
special_tokens_map.json +40 -0
tokenizer.json +3 -0
tokenizer_config.json +326 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
+dmind-2-performance-tweet.jpeg filter=lfs diff=lfs merge=lfs -text
+dmind-2-performance.jpeg filter=lfs diff=lfs merge=lfs -text
+quantization_config.json filter=lfs diff=lfs merge=lfs -text

MODEL-LICENSE ADDED Viewed

	@@ -0,0 +1,96 @@

+DMind-2-107B Model License Agreement
+Version 1.0, 9 September 2025
+Copyright (c) 2025 DMind
+Section 1: Preamble
+DMind is an open-source AGI research organization focused on next-generation digital finance. Driven by real market needs, DMind continuously releases open-source products—including large language models, benchmarks, datasets, tools, and more.
+As an open, research-driven community, DMind is powered by a global collective of AI and Web3 enthusiasts, builders, and researchers. All of our work is released under permissive licenses, enabling individuals and enterprises alike to freely use, adapt, and build upon them to create new AI-native innovations.
+DMind-2-107B is a crypto investment analysis large language model designed to provide real-time, professional consulting for individual and institutional investors. Built via domain-adaptive post-training, it integrates macro market trends with micro on-chain behaviors and can orchestrate complex multi-protocol on-chain tasks.The model is enhanced with advanced tool-calling capabilities for seamless integration with protocols, APIs, and trading interfaces. DMind-2-107B is derived from the GLM-4.5-Air base model and contains approximately 107B parameters.
+This license is designed to enable open and responsible use of DMind-2-107B. It grants broad rights to use, modify, and redistribute the model, while also enforcing responsible-use provisions to prevent misuse.
+Section 2: Definitions
+2.1 License – This document outlining the terms under which you may use, modify, and distribute the Model.
+2.2 Model – Refers to DMind-2-107B, including all learned weights, parameters (including optimizer states), checkpoints, and model architecture.
+2.3 Derivatives of the Model – Any model, tool, or work derived from or incorporating the Model, including fine-tuned or distilled variants, or models trained using synthetic data generated by the Model.
+2.4 Complementary Material – The source code, configuration files, evaluation tools, and documentation provided with the Model.
+2.5 Output – Any content generated by operating the Model, including text, code, or structured data.
+2.6 Distribution – Making the Model or its Derivatives available to third parties by any method, including downloads, APIs, or services.
+2.7 DMind – The organization and research community releasing this Model.
+2.8 You – Any individual or entity using or distributing the Model.
+Section 3: Intellectual Property Rights
+3.1 Grant of Copyright License
+DMind grants you a perpetual, worldwide, royalty-free, non-exclusive, irrevocable license to use, reproduce, prepare derivative works of, publicly display, publicly perform, sublicense, and distribute the Model, its Derivatives, and the Complementary Material.
+3.2 Grant of Patent License
+Where applicable, DMind grants you a worldwide, royalty-free, non-exclusive, irrevocable license under any relevant patent claims owned or licensable by DMind to make, use, sell, offer for sale, import, and distribute the Model and its Derivatives. This license terminates if you initiate patent litigation related to the Model.
+Section 4: Usage and Distribution
+4.1 Permitted Use
+You may use the Model and its Derivatives for any lawful purpose, including commercial use, research, deployment in applications, and further fine-tuning.
+4.2 Distribution Conditions
+When distributing the Model or any Derivative:
+4.2.1 Include a copy of this License;
+4.2.2 Retain all original attribution and legal notices;
+4.2.3 Clearly mark any modifications made;
+4.2.4 Pass on the use-based restrictions in Section 5 and Attachment A in a legally enforceable way to downstream recipients.
+4.3 Use-Based Restrictions
+You must not use the Model or its Derivatives in any manner prohibited by Attachment A. You are responsible for ensuring your users also comply with these terms.
+4.4 Output Ownership
+DMind claims no rights over the Output generated by your use of the Model. However, you are solely responsible for how such Output is used, and you must ensure it does not violate the responsible use terms of this License.
+Section 5: Other Provisions
+5.1 Updates and Restrictions
+DMind reserves the right to restrict your use of the Model (including runtime control if applicable) if you breach this License.
+5.2 Trademarks
+This License does not grant any rights to use DMind's name, logo, or branding. You may not suggest affiliation or endorsement without written permission.
+5.3 Compliance with Data & IP Laws
+The Model may have been trained on data containing protected information or content. You are responsible for complying with data protection and IP laws when using the Model or its Derivatives.
+5.4 Disclaimer of Warranty
+The Model is provided “AS IS” without warranties of any kind. You assume all risk associated with your use.
+5.5 Limitation of Liability
+DMind shall not be liable for any direct, indirect, incidental, or consequential damages arising from use of the Model.
+5.6 Additional Warranties and Indemnities
+If you offer support or warranty to others based on the Model, you do so at your own risk. You agree to indemnify DMind from any liability resulting from your warranties.
+5.7 Severability
+If any part of this License is found unenforceable, the remainder will stay in effect.
+5.8 Upstream Base Model Attribution and Compliance
+DMind-2-107B is derived from GLM-4.5-Air. When using or distributing the Model or any Derivative, you must also comply with the license terms applicable to GLM-4.5-Air (the "Zai License"). Nothing in this Agreement is intended to, or shall be interpreted to, limit your obligations under the upstream license.
+Attachment A: Use Restrictions
+You agree not to use the Model or Derivatives of the Model:
+A.1 In violation of any law or regulation;
+A.2 For any military, warfare, or weapons-related purpose;
+A.3 To exploit or harm minors in any way;
+A.4 To generate or spread false or harmful information;
+A.5 To collect or reveal personal identifying information without consent;
+A.6 To defame, harass, or harm individuals or communities;
+A.7 For automated decision-making that affects individuals’ legal or human rights without oversight;
+A.8 In ways that discriminate based on race, gender, orientation, disability, or other protected characteristics;
+A.9 To exploit vulnerable groups or create manipulative or abusive systems.

Modelfile ADDED Viewed

	@@ -0,0 +1,13 @@

+# ollama modelfile auto-generated by llamafactory
+FROM .
+TEMPLATE """[gMASK]<sop>{{ if .System }}<|system|>
+{{ .System }}{{ end }}{{ range .Messages }}{{ if eq .Role "user" }}<|user|>
+{{ .Content }}<|assistant|>{{ else if eq .Role "assistant" }}
+{{ .Content }}{{ end }}{{ end }}"""
+PARAMETER stop "<|user|>"
+PARAMETER stop "<|endoftext|>"
+PARAMETER stop "<|observation|>"
+PARAMETER num_ctx 4096

README.md ADDED Viewed

	@@ -0,0 +1,241 @@

+---
+license: apache-2.0
+language:
+- en
+- zh
+base_model:
+- zai-org/GLM-4.5-Air
+---
+# DMind-2: Advanced Crypto Domain-Specific Large Language Models with Distribution-Preserving CoT Distillation
+<div align="center">
+  <img src="dmind-ai-logo.png" width="60%" alt="DMind-2" />
+</div>
+<hr>
+<div align="center">
+    <a href="https://dmind.ai/">
+    <img alt="DMind Website" src="https://img.shields.io/badge/DMind-Homepage-blue?logo=data:image/svg+xml;base64,)"/>
+  </a>
+  <a href="https://huggingface.co/DMindAI">
+    <img alt="Hugging Face" src="https://img.shields.io/badge/HuggingFace-DMind-ffd21f?color=ffd21f&logo=huggingface"/>
+  </a>
+  <a href="https://x.com/dmind_ai">
+    <img alt="X" src="https://img.shields.io/badge/X-@dmindai-1DA1F2?logo=x"/>
+  </a>
+    <!-- <a href="https://huggingface.co/spaces/DMindAI/DMind-1">
+    <img alt="Chat"
+      src="https://img.shields.io/badge/🤖%20Chat-DMind-536af5?color=536af5&logoColor=white"/>
+  </a> -->
+  <a href="https://discord.gg/xxwmPHU3">
+    <img alt="Discord"
+      src="https://img.shields.io/badge/Discord-DMind-7289da?logo=discord&logoColor=white&color=7289da"/>
+  </a>
+  <a href="https://opensource.org/licenses/MIT">
+    <img alt="Code License: MIT" src="https://img.shields.io/badge/Code%20License-MIT-yellow.svg"/>
+  </a>
+  <!-- <a href="MODEL-LICENSE">
+    <img alt="Model License: Model Agreement" src="https://img.shields.io/badge/Model%20License-Model%20Agreement-yellow.svg"/>
+  </a> -->
+</div>
+## Model Overview
+DMind-2 is a series of crypto investment analysis language models designed to provide real-time, professional crypto investment consulting services for individual investors and professional institutions. Standing on the shoulders of numerous open-source pioneers, we have successfully launched two model variants through innovative post-training techniques.
+Among these, **Dmind-2-107B** demonstrates exceptional depth of understanding and analytical capabilities when addressing complex crypto ecosystem challenges, delivering comprehensive insights that span from macroeconomic trends to microscopic on-chain behaviors.
+## Model Variants(DMind-2-107B)
+- **Base Model**: GLM-4.5-Air
+- **Parameters**: 107B
+- **Training Duration**: 1 month of refined post-training
+- **Hardware Requirements**:
+- **Features**: Its core advantage lies in its ability to deeply integrate macro market trends with micro on-chain activities, possessing a panoramic multi-chain data analysis capability; it can autonomously orchestrate and execute complex on-chain tasks spanning multiple protocols and dozens of steps; enhanced with advanced tool-calling capabilities for seamless integration with protocols, APIs, and trading interfaces; and it can synthesize traditional indicators with crypto-native signals such as on-chain data and social sentiment, providing investors with unprecedented deep insights and intelligent decision-making support.
+## Technical Innovations
+### 1. Domain-Adaptive Supervised Fine-Tuning (SFT)
+In building DMind-2, we deeply understand the uniqueness of the crypto investment domain—it requires not only profound blockchain technical understanding but also keen financial market insights, and most importantly, the ability to perform rigorous logical reasoning among complex on-chain data and market signals. Therefore, our domain-adaptive fine-tuning strategy fully considers these requirements from the very beginning of dataset construction. We carefully curated a total of 47.6K high-quality training samples, including 27.8K crypto domain-specific data points covering comprehensive crypto investment scenarios from DeFi protocol analysis and NFT valuation models to DAO governance decisions. These data points are not simple Q&A pairs but contain complete investment logic chains, encompassing the entire reasoning process from market observation, data analysis, and risk assessment to investment recommendations.
+To ensure the model maintains fundamental financial analysis capabilities while focusing on the crypto domain, we specifically incorporated 11.2K high-quality general domain data points and 8.6K pan-financial domain data points. These datasets help the model establish a solid foundation in financial theory and market analysis frameworks, enabling it to creatively apply mature methodologies from traditional finance to the emerging crypto sector. Through this multi-layered data fusion strategy, DMind-2 can act like a professional investment advisor who understands both technology and finance, providing users with comprehensive and in-depth investment analysis.
+### 2. 🔥 Core Innovation: Distribution-Preserving Chain-of-Thought Distillation (DPCD)
+DMind-2's greatest technical breakthrough lies in our innovative Distribution-Preserving Chain-of-Thought Distillation method. Traditional domain fine-tuning causes catastrophic forgetting in CoT models, where the model loses reasoning coherence while gaining domain knowledge. Our DPCD method solves this through a mathematically rigorous dual-stream architecture.
+#### Core Formulation
+The DPCD optimization objective combines domain adaptation with reasoning preservation through the following loss function:
+$$
+\mathcal{L}_{\text{DPCD}} = \underbrace{\mathcal{L}_{\text{CE}}(\theta_s, \mathcal{D}_{\text{crypto}})}_{\text{Domain Learning}} + \underbrace{\lambda(t) \cdot \sum_{i=1}^{T} \alpha_i \cdot D_{\text{KL}}(P_{\theta_s}^{(i)} \| P_{\theta_t}^{(i)})}_{\text{Distribution Preservation}} + \underbrace{\beta \cdot \mathcal{L}_{\text{QS}}(\mathcal{C}_{\theta_s})}_{\text{Quality Score}}
+$$
+Where:
+* \\(\theta_s\\) and \\(\theta_t\\) represent student (trainable) and teacher (frozen) model parameters.
+* \\(P_{\theta}^{(i)}\\) denotes the probability distribution at reasoning step \\(i\\).
+* \\(\lambda(t) = \lambda_0 \cdot (1 + \gamma \cdot \text{complexity}(x_t))\\) is the dynamic weight function.
+* \\(\alpha_i = \exp(-\delta \cdot i/T)\\) implements exponential decay for later reasoning steps.
+* \\(\mathcal{L}_{\text{QS}}\\) is the quality scoring loss ensuring reasoning coherence.
+#### Dynamic Weight Adjustment Mechanism
+The complexity-aware weight adjustment is formulated as:
+$$
+\lambda(t) = \begin{cases}
+\lambda_{\text{high}} \cdot \left(1 + \tanh\left(\frac{\mathcal{H}(x_t) - \mu_{\mathcal{H}}}{\sigma_{\mathcal{H}}}\right)\right) & \text{if } \mathcal{T}(x_t) \in \{\text{DeFi Analysis, Risk Assessment}\} \\
+\lambda_{\text{base}} & \text{if } \mathcal{T}(x_t) \in \{\text{Market Data, Price Query}\} \\
+\lambda_{\text{base}} \cdot \left(1 + \frac{\mathcal{S}(c_t)}{|\mathcal{V}_{\text{crypto}}|}\right) & \text{otherwise}
+\end{cases}
+$$
+Where \\(\mathcal{H}(x_t)\\) measures reasoning complexity through chain length and branching factor, \\(\mathcal{S}(c_t)\\) counts domain-specific terms, and \\(|\mathcal{V}_{\text{crypto}}|\\) is the crypto vocabulary size.
+This mathematical framework ensures that DMind-2 maintains Qwen3's powerful reasoning capabilities while acquiring deep crypto domain expertise. The KL divergence constraint operates at each token generation step, preserving the original model's reasoning patterns. The quality scoring mechanism \\(\mathcal{L}_{\text{QS}}\\) filters out low-quality reasoning chains, maintaining only those paths with coherence scores above threshold \\(\tau = 0.85\\).
+Through extensive experimentation, we found optimal hyperparameters: \\(\lambda_{\text{base}} = 0.3\\), \\(\lambda_{\text{high}} = 0.7\\), \\(\beta = 0.2\\), and \\(\delta = 0.1\\). This configuration achieves a 94.1% reasoning chain completeness while improving domain-specific accuracy by 23.2% over baseline fine-tuning methods.
+### 3. Reinforcement Learning from Human Feedback (RLHF) Optimization
+After completing basic domain fine-tuning, we further optimize the model using the Group Relative Policy Optimization (GRPO) algorithm. GRPO offers better stability compared to traditional PPO algorithms, which is particularly important for financial domain models—we cannot tolerate dramatic performance fluctuations during optimization as this could lead to unpredictable investment advice. During the RLHF phase, we focused on addressing two key issues: professional output formatting and safety compliance.
+For professional output formatting, we constructed 4.2K carefully designed professional format data points. These data samples are sourced from real investment research reports, market analysis documents, and project due diligence reports, covering all aspects of investment analysis. Through RLHF training, the model learned how to organize a professional investment analysis report: starting with an executive summary that clearly articulates investment opportunities and risks; conducting in-depth technical analysis and market evaluation in the main body; and finally providing clear investment recommendations and risk warnings. This structured output not only improves information readability but more importantly helps investors establish systematic analytical frameworks, avoiding impulsive investment decisions due to disorganized information.
+Safety alignment is another aspect we particularly emphasize. The crypto investment field is full of high-risk, high-reward opportunities, and the model must accurately identify and highlight potential risks. We use proprietary risk case datasets to conduct safety training on the model, ensuring it won't output overly optimistic investment advice or overlook obvious risk signals. For example, when analyzing an emerging DeFi protocol, the model automatically checks key risk indicators such as smart contract audit status, team background, and total value locked, explicitly marking risk levels in investment recommendations. This responsible output approach not only protects users' asset security but also reflects our commitment to financial compliance.
+## Performance Metrics
+<div align="center">
+  <img src="dmind-2-performance.jpeg" width="85%" alt="DMind-2" />
+</div>
+| Category | Benchmark (Metric) | DeepSeek-R1-0528 | gpt-oss-120b | Qwen3-235b-a22b | GLM-4.5-Air | **DMind-2-107B(107B)** |
+| :--- | :--- | :--- | :--- | :--- | :--- | :--- |
+| **General** | | | | | | |
+| | MMLU-Pro (EM) | 84.0 | 90.0 | 80.6 | 81.4 | 83.1 |
+| | GPQA-Diamond (Pass@1) | 71.5 | 80.9 | 77.5 | 75.0 | 74.3 |
+| | SimpleQA (Correct) | 30.1 | 6.7 | 54.3 | - | 51.5 |
+| **Math** | | | | | | |
+| | AIME 2024 (Pass@1) | 79.8 | 96.6 | 80.4 | 89.4 | 93.3 |
+| | AIME 2025 (Pass@1) | 70.0 | 97.9 | 70.3 | - | 94.8 |
+| | CNMO 2024 (Pass@1) | 78.8 | 86.9 | - | - | 84.1 |
+| **Tools** | | | | | | |
+| | BFCL_v3 | - | 67.8 | 70.3 | 76.4 | 74.5 |
+| **Crypto** | | | | | | |
+| | DMind Benchmark | 74.1 | 76.3 | 73.4 | 76.8 | 82.2 |
+## Application Scenarios
+### 🎯 Edge-Side Crypto Investment Decision Support
+DMind-2 can provide real-time crypto investment analysis on users' personal devices, including DeFi yield comparisons, liquidity mining strategy optimization, and NFT valuation analysis. All calculations and analyses are completed locally, ensuring absolute privacy of investment strategies and position information. The model can analyze on-chain data, evaluate project fundamentals, identify market trends, and provide comprehensive support for investment decisions.
+### 💼 Personalized Financial Advisory Services
+Based on users' risk preferences, investment objectives, and asset allocation needs, DMind-2 can provide customized investment advice. Whether for long-term value investing or short-term arbitrage opportunities, the model can provide professional analysis and recommendations. More importantly, it can explain complex crypto concepts in plain language, helping investors understand the logic behind every investment decision.
+### 📊 Comprehensive Financial Investment Computational Analysis
+DMind-2 is not limited to the crypto domain but also possesses powerful pan-financial computational analysis capabilities. It can perform yield calculations, risk assessments, portfolio optimization, correlation analysis, and other professional financial computations. By integrating traditional financial theory with crypto innovative mechanisms, the model helps investors find optimal asset allocation solutions between old and new financial systems.
+### 🔍 Real-Time Market Monitoring and Alerts
+Edge-deployed DMind-2 can monitor market dynamics 24/7, promptly alerting users when important market events or investment opportunities arise. Running locally ensures extremely fast response speeds, providing immediate response recommendations during severe market volatility.
+## Usage Example
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+# Load the model and tokenizer
+model = AutoModelForCausalLM.from_pretrained(
+    "zai-org/GLM-4.5-Air",
+    torch_dtype=torch.float16,
+    device_map="auto",
+    trust_remote_code=True
+)
+tokenizer = AutoTokenizer.from_pretrained(
+    "zai-org/GLM-4.5-Air",
+    trust_remote_code=True
+)
+# Example dialogue
+prompt = """<|im_start|>user
+Please analyze the following investment opportunity:
+1. Project: Emerging Layer2 DEX Protocol
+2. TVL: $50M, growth rate 200%/month
+3. Token Economics: 70% circulating, 30% team locked for 2 years
+4. My risk tolerance: Medium
+Please provide investment advice and risk analysis.
+<|im_end|>
+<|im_start|>assistant
+"""
+# Generate a response
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(
+    **inputs,
+    max_length=2048,
+    temperature=0.7,
+    do_sample=True,
+    pad_token_id=tokenizer.eos_token_id
+)
+response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+print(response)
+```
+## Privacy & Security
+- 🔐 **Fully Localized**: All inference computations are completed on user devices, no internet required
+- 🛡️ **Data Privacy**: Investment strategies and personal information never leave local devices
+- ⚡ **Real-Time Response**: No network latency, millisecond-level response speed
+- 🔒 **Security Compliance**: Built-in risk warning mechanisms, compliant with financial regulations
+## Limitations & Disclaimers
+1. **Not Investment Advice**: Model outputs are for reference only; final investment decisions require users' own judgment
+2. **Market Risk**: Crypto markets are highly volatile; please carefully assess risk tolerance
+3. **Knowledge Timeliness**: Model knowledge has temporal limitations; latest market information requires additional verification
+4. **Regulatory Compliance**: Please comply with financial regulations in your jurisdiction when using
+## Acknowledgments
+We thank the Qwen and zai teams for providing the excellent base model and the continuous contributions from the open-source community. DMind-2's success wouldn't be possible without the collective efforts of the entire AI and Crypto community.
+## License
+This model follows the Apache 2.0 open-source license. Commercial use must comply with relevant terms.
+## Citation
+```bibtex
+@misc{dmind2025,
+  title={DMind-2: Advanced Crypto Domain-Specific Large Language Models with Distribution-Preserving CoT Distillation},
+  author={DMind Team},
+  year={2025},
+  publisher={Hugging Face}
+}
+```
+## Contact
+- 🌐 Project Homepage: [https://dmind.ai](https://dmind.ai)
+- 💬 Community Discussion: [Discord](https://discord.gg/dmind)
+- 🐦 Twitter: [@DMindAI](https://twitter.com/DMindAI)
+---

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,103 @@

+[gMASK]<sop>
+{%- if tools -%}
+<|system|>
+# Tools
+You may call one or more functions to assist with the user query.
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{% for tool in tools %}
+{{ tool | tojson(ensure_ascii=False) }}
+{% endfor %}
+</tools>
+For each function call, output the function name and arguments within the following XML format:
+<tool_call>{function-name}
+<arg_key>{arg-key-1}</arg_key>
+<arg_value>{arg-value-1}</arg_value>
+<arg_key>{arg-key-2}</arg_key>
+<arg_value>{arg-value-2}</arg_value>
+...
+</tool_call>{%- endif -%}
+{%- macro visible_text(content) -%}
+    {%- if content is string -%}
+        {{- content }}
+    {%- elif content is iterable and content is not mapping -%}
+        {%- for item in content -%}
+            {%- if item is mapping and item.type == 'text' -%}
+                {{- item.text }}
+            {%- elif item is string -%}
+                {{- item }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{- content }}
+    {%- endif -%}
+{%- endmacro -%}
+{%- set ns = namespace(last_user_index=-1) %}
+{%- for m in messages %}
+    {%- if m.role == 'user' %}
+        {% set ns.last_user_index = loop.index0 -%}
+    {%- endif %}
+{%- endfor %}
+{% for m in messages %}
+{%- if m.role == 'user' -%}<|user|>
+{{ visible_text(m.content) }}
+{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}}
+{%- elif m.role == 'assistant' -%}
+<|assistant|>
+{%- set reasoning_content = '' %}
+{%- set content = visible_text(m.content) %}
+{%- if m.reasoning_content is string %}
+    {%- set reasoning_content = m.reasoning_content %}
+{%- else %}
+    {%- if '</think>' in content %}
+        {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+        {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+    {%- endif %}
+{%- endif %}
+{%- if loop.index0 > ns.last_user_index and reasoning_content -%}
+{{ '\n<think>' + reasoning_content.strip() +  '</think>'}}
+{%- else -%}
+{{ '\n<think></think>' }}
+{%- endif -%}
+{%- if content.strip() -%}
+{{ '\n' + content.strip() }}
+{%- endif -%}
+{% if m.tool_calls %}
+{% for tc in m.tool_calls %}
+{%- if tc.function %}
+    {%- set tc = tc.function %}
+{%- endif %}
+{{ '\n<tool_call>' + tc.name }}
+{% set _args = tc.arguments %}
+{% for k, v in _args.items() %}
+<arg_key>{{ k }}</arg_key>
+<arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>
+{% endfor %}
+</tool_call>{% endfor %}
+{% endif %}
+{%- elif m.role == 'tool' -%}
+{%- if m.content is string -%}
+{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+    {{- '<|observation|>' }}
+{%- endif %}
+{{- '\n<tool_response>\n' }}
+{{- m.content }}
+{{- '\n</tool_response>' }}
+{%- else -%}
+<|observation|>{% for tr in m.content %}
+<tool_response>
+{{ tr.output if tr.output is defined else tr }}
+</tool_response>{% endfor -%}
+{% endif -%}
+{%- elif m.role == 'system' -%}
+<|system|>
+{{ visible_text(m.content) }}
+{%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    <|assistant|>{{- '\n<think></think>' if (enable_thinking is defined and not enable_thinking) else '' -}}
+{%- endif -%}

config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+    "architectures": [
+        "Glm4MoeForCausalLM"
+    ],
+    "attention_bias": true,
+    "attention_dropout": 0.0,
+    "eos_token_id": [
+        151329,
+        151336,
+        151338
+    ],
+    "first_k_dense_replace": 1,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 4096,
+    "initializer_range": 0.02,
+    "intermediate_size": 10944,
+    "max_position_embeddings": 131072,
+    "model_type": "glm4_moe",
+    "moe_intermediate_size": 1408,
+    "n_group": 1,
+    "n_routed_experts": 128,
+    "n_shared_experts": 1,
+    "norm_topk_prob": true,
+    "num_attention_heads": 96,
+    "num_experts_per_tok": 8,
+    "num_hidden_layers": 46,
+    "num_key_value_heads": 8,
+    "num_nextn_predict_layers": 1,
+    "pad_token_id": 151329,
+    "partial_rotary_factor": 0.5,
+    "rms_norm_eps": 1e-05,
+    "rope_scaling": null,
+    "rope_theta": 1000000,
+    "routed_scaling_factor": 1.0,
+    "tie_word_embeddings": false,
+    "topk_group": 1,
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.56.0.dev0",
+    "use_cache": true,
+    "use_qk_norm": false,
+    "vocab_size": 151552,
+    "quantization_config": {
+        "quant_method": "exl3",
+        "version": "0.0.11",
+        "bits": 2.91,
+        "head_bits": 6,
+        "calibration": {
+            "rows": 250,
+            "cols": 2048
+        },
+        "out_scales": "auto",
+        "codebook": "mcg"
+    }
+}

dmind-2-performance.jpeg ADDED Viewed

Git LFS Details

SHA256: 1f4aeba3cbc5b2c26b30cb07b4276442f79da498cb5f50375b1a68bb9417cbb1
Pointer size: 131 Bytes
Size of remote file: 549 kB

dmind-ai-logo.png ADDED Viewed

generation_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "_from_model_config": true,
+  "eos_token_id": [
+    151329,
+    151336,
+    151338
+  ],
+  "pad_token_id": 151329,
+  "transformers_version": "4.56.0.dev0"
+}

model-00001-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01a3ba550548a1a6015cbd6674418765aa599dc952454083695d7ec9ad3e57db
+size 8046361584

model-00002-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f7306ee5206cf6ddd269e3a44517244f16a56efd7ade06733ffaa2451da7c68
+size 7831519308

model-00003-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffdd95f57f1126b610c0e8786f8dc772754a4ce95e09bb5576b7f291384eb234
+size 8148044392

model-00004-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e14e0330e75673587c7261807a5b7994830deadf9d42a3572e2dba13697f63e
+size 8240319168

model-00005-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89288b23eb78a7ab092d059b15937bda7d7ced38c783f2aefab8e635e4f3b5dc
+size 7836037040

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

quantization_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c437ca8132d967ede4499394336da00a650a0ec24511174801e5801153a82c7
+size 21625732

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "[MASK]",
+    "[gMASK]",
+    "[sMASK]",
+    "<sop>",
+    "<eop>",
+    "<|system|>",
+    "<|user|>",
+    "<|assistant|>",
+    "<|observation|>",
+    "<|begin_of_image|>",
+    "<|end_of_image|>",
+    "<|begin_of_video|>",
+    "<|end_of_video|>",
+    "<|begin_of_audio|>",
+    "<|end_of_audio|>",
+    "<|begin_of_transcription|>",
+    "<|end_of_transcription|>",
+    "<|code_prefix|>",
+    "<|code_middle|>",
+    "<|code_suffix|>",
+    "/nothink"
+  ],
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bda8e2146c3bb7b7e0fc96dcc4f0aeff041c6c27952e3ace0665663ebff346ba
+size 19970700

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,326 @@

+{
+  "added_tokens_decoder": {
+    "151329": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151330": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151331": {
+      "content": "[gMASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151332": {
+      "content": "[sMASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151333": {
+      "content": "<sop>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151334": {
+      "content": "<eop>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151335": {
+      "content": "<|system|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151336": {
+      "content": "<|user|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151337": {
+      "content": "<|assistant|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151338": {
+      "content": "<|observation|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151339": {
+      "content": "<|begin_of_image|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151340": {
+      "content": "<|end_of_image|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151341": {
+      "content": "<|begin_of_video|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151342": {
+      "content": "<|end_of_video|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151343": {
+      "content": "<|begin_of_audio|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151344": {
+      "content": "<|end_of_audio|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151345": {
+      "content": "<|begin_of_transcription|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151346": {
+      "content": "<|end_of_transcription|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151347": {
+      "content": "<|code_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151348": {
+      "content": "<|code_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151349": {
+      "content": "<|code_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151350": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151351": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151352": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151353": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151354": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151355": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151356": {
+      "content": "<arg_key>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151357": {
+      "content": "</arg_key>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151358": {
+      "content": "<arg_value>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151359": {
+      "content": "</arg_value>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151360": {
+      "content": "/nothink",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151361": {
+      "content": "<|begin_of_box|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151362": {
+      "content": "<|end_of_box|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151363": {
+      "content": "<|image|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151364": {
+      "content": "<|video|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "[MASK]",
+    "[gMASK]",
+    "[sMASK]",
+    "<sop>",
+    "<eop>",
+    "<|system|>",
+    "<|user|>",
+    "<|assistant|>",
+    "<|observation|>",
+    "<|begin_of_image|>",
+    "<|end_of_image|>",
+    "<|begin_of_video|>",
+    "<|end_of_video|>",
+    "<|begin_of_audio|>",
+    "<|end_of_audio|>",
+    "<|begin_of_transcription|>",
+    "<|end_of_transcription|>",
+    "<|code_prefix|>",
+    "<|code_middle|>",
+    "<|code_suffix|>",
+    "/nothink"
+  ],
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "model_max_length": 128000,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "remove_space": false,
+  "split_special_tokens": false,
+  "tokenizer_class": "PreTrainedTokenizerFast"
+}