Commit
·
07fe458
1
Parent(s):
a951c04
update attn_impl for 70m and 1b models
Browse files- 1b-model/model_config.yml +2 -1
- 70m-model/model_config.yml +2 -1
1b-model/model_config.yml
CHANGED
|
@@ -8,7 +8,8 @@ n_heads: 16
|
|
| 8 |
use_glu: false
|
| 9 |
n_layers: 24
|
| 10 |
attn_config:
|
| 11 |
-
attn_impl:
|
|
|
|
| 12 |
attn_type: grouped_query_attention
|
| 13 |
kv_nheads: 16
|
| 14 |
attn_pdrop: 0
|
|
|
|
| 8 |
use_glu: false
|
| 9 |
n_layers: 24
|
| 10 |
attn_config:
|
| 11 |
+
attn_impl: flash
|
| 12 |
+
use_attn_mask: false
|
| 13 |
attn_type: grouped_query_attention
|
| 14 |
kv_nheads: 16
|
| 15 |
attn_pdrop: 0
|
70m-model/model_config.yml
CHANGED
|
@@ -8,7 +8,8 @@ n_heads: 8
|
|
| 8 |
use_glu: false
|
| 9 |
n_layers: 12
|
| 10 |
attn_config:
|
| 11 |
-
attn_impl:
|
|
|
|
| 12 |
attn_type: grouped_query_attention
|
| 13 |
kv_nheads: 8
|
| 14 |
attn_pdrop: 0
|
|
|
|
| 8 |
use_glu: false
|
| 9 |
n_layers: 12
|
| 10 |
attn_config:
|
| 11 |
+
attn_impl: flash
|
| 12 |
+
use_attn_mask: false
|
| 13 |
attn_type: grouped_query_attention
|
| 14 |
kv_nheads: 8
|
| 15 |
attn_pdrop: 0
|