File size: 3,247 Bytes
db2bead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133

slices:
  # Group 1
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [0, 0]
      - model: m-a-p/neo_7b
        layer_range: [3, 3]
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [1, 1]
      - model: m-a-p/neo_7b
        layer_range: [3, 3]
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [2, 2]
      - model: m-a-p/neo_7b
        layer_range: [3, 3]
  # Group 2
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [4, 4]
      - model: m-a-p/neo_7b
        layer_range: [7, 7]
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [5, 5]
      - model: m-a-p/neo_7b
        layer_range: [7, 7]
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [6, 6]
      - model: m-a-p/neo_7b
        layer_range: [7, 7]
  # Group 3
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [8, 8]
      - model: m-a-p/neo_7b
        layer_range: [11, 11]
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [9, 9]
      - model: m-a-p/neo_7b
        layer_range: [11, 11]
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [10, 10]
      - model: m-a-p/neo_7b
        layer_range: [11, 11]
  # Group 4
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [12, 12]
      - model: m-a-p/neo_7b
        layer_range: [15, 15]
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [13, 13]
      - model: m-a-p/neo_7b
        layer_range: [15, 15]
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [14, 14]
      - model: m-a-p/neo_7b
        layer_range: [15, 15]
  # Group 5
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [16, 16]
      - model: m-a-p/neo_7b
        layer_range: [19, 19]
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [17, 17]
      - model: m-a-p/neo_7b
        layer_range: [19, 19]
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [18, 18]
      - model: m-a-p/neo_7b
        layer_range: [19, 19]
  # Group 6
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [20, 20]
      - model: m-a-p/neo_7b
        layer_range: [23, 23]
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [21, 21]
      - model: m-a-p/neo_7b
        layer_range: [23, 23]
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [22, 22]
      - model: m-a-p/neo_7b
        layer_range: [23, 23]
  # Group 7 (last group)
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [24, 24]
      - model: m-a-p/neo_7b
        layer_range: [27, 27]
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [25, 25]
      - model: m-a-p/neo_7b
        layer_range: [27, 27]
  - sources:
      - model: m-a-p/neo_7b
        layer_range: [26, 26]
      - model: m-a-p/neo_7b
        layer_range: [27, 27]
merge_method: slerp
base_model: m-a-p/neo_7b
parameters:
  t: 0.3333  # Apply 1/3 of the 4th layer to each of the previous 3 layers
dtype: bfloat16
output_path: ./merged_redistributed_neo_7b
model_config:
  num_hidden_layers: 21
  attention_bias: false
  attention_dropout: 0.0
  hidden_act: "silu"
  hidden_size: 3072
  intermediate_size: 24576
  num_attention_heads: 16
  num_key_value_heads: 16
  rms_norm_eps: 1e-05
  rope_theta: 10000.0
  use_cache: true