Wan Xinyi
commited on
Commit
·
594a8f9
1
Parent(s):
4835f75
Add requirements
Browse files- requirements.txt +2 -0
- v_schedule.py +63 -50
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
drawsvg
|
| 2 |
+
pathlib
|
v_schedule.py
CHANGED
|
@@ -113,51 +113,51 @@ class PipelineGraph(object):
|
|
| 113 |
pending_w[stage].append((2, chunk, _cnt))
|
| 114 |
count[stage][cat * 2 + chunk] += 1
|
| 115 |
|
| 116 |
-
for _ in range(2 * self.n_stage):
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
|
| 162 |
# init_bubble = get_max_stage_bubble()
|
| 163 |
# print(stage_bubble)
|
|
@@ -288,7 +288,7 @@ class PipelineGraph(object):
|
|
| 288 |
_str += _c
|
| 289 |
print(_str)
|
| 290 |
|
| 291 |
-
def get_v_schedule(self):
|
| 292 |
schedule, end_time, max_bubble = None, None, None
|
| 293 |
expected_time = sum(self.fbw_cost) * self.n_micro * 2
|
| 294 |
for fill_b in [True, False]:
|
|
@@ -301,10 +301,12 @@ class PipelineGraph(object):
|
|
| 301 |
max_bubble = _max_bubble
|
| 302 |
schedule = _schedule
|
| 303 |
end_time = _end_time
|
|
|
|
|
|
|
| 304 |
# self.print_details(end_time, print_scaling=1)
|
| 305 |
-
bubble_rate = max_bubble / expected_time
|
| 306 |
-
print("%2d %3d, [%5d %5d %5d], %6d -> %6.4f" % \
|
| 307 |
-
(self.n_stage, self.n_micro, *self.fbw_cost, self.max_mem // self.f_mem, bubble_rate))
|
| 308 |
local_order = [[] for _ in range(self.n_stage)]
|
| 309 |
comm_id = {}
|
| 310 |
comm_id_counter = 0
|
|
@@ -434,14 +436,24 @@ if __name__ == '__main__':
|
|
| 434 |
# (32, 96, 10419, 10207, 7715, 408, 6144, 48, 64),
|
| 435 |
# (32, 128, 10408, 10204, 7703, 408, 6144, 48, 64),
|
| 436 |
# (32, 256, 10402, 10248, 7698, 460, 6144, 48, 64),
|
| 437 |
-
(4, 8, 6, 4, 4, 1, 4096, 32, 32),
|
| 438 |
# (8, 24, 29444, 29718, 19927, 527, 4096, 32, 32),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
]
|
| 440 |
s = 1024
|
| 441 |
|
| 442 |
# h, a, s = 4096, 32, 1024
|
| 443 |
# cost_f, cost_b, cost_w, cost_c = 29718, 29444, 19927, 527
|
| 444 |
-
for p, n, f, b, w, c, h, a,
|
| 445 |
mem_f = 34 * h + 5 * a * s
|
| 446 |
mem_w = - 32 * h
|
| 447 |
mem_b = - mem_w - mem_f
|
|
@@ -459,3 +471,4 @@ if __name__ == '__main__':
|
|
| 459 |
max_mem=mem_f * (p * 2 + m_offset),
|
| 460 |
)
|
| 461 |
graph.get_v_schedule()
|
|
|
|
|
|
| 113 |
pending_w[stage].append((2, chunk, _cnt))
|
| 114 |
count[stage][cat * 2 + chunk] += 1
|
| 115 |
|
| 116 |
+
# for _ in range(2 * self.n_stage):
|
| 117 |
+
# for i in range(self.n_stage):
|
| 118 |
+
# if count[i][1] >= count[i][0]:
|
| 119 |
+
# put(0, 0, i, assert_cnt=False)
|
| 120 |
+
# continue
|
| 121 |
+
# if i == self.n_stage - 1:
|
| 122 |
+
# put(0, 1, i, assert_cnt=False)
|
| 123 |
+
# continue
|
| 124 |
+
# fa_id = self.get_id(0, 1, i + 1, count[i][1])
|
| 125 |
+
# if 0 <= end_time[fa_id] < cur_time[i + 1]: # TODO
|
| 126 |
+
# put(0, 1, i, assert_cnt=False)
|
| 127 |
+
# else:
|
| 128 |
+
# put(0, 0, i, assert_cnt=False)
|
| 129 |
|
| 130 |
+
for i in range(self.n_stage):
|
| 131 |
+
put(0, 0, i)
|
| 132 |
+
for i in range(self.n_stage - 1, -1, -1):
|
| 133 |
+
if i == self.n_stage - 1:
|
| 134 |
+
put(0, 1, i)
|
| 135 |
+
continue
|
| 136 |
+
tmp = end_time[self.get_id(0, 1, i + 1, 0)] + self.c_cost
|
| 137 |
+
while mem[i] + self.fbw_mem[0] * (2 + i * 2) <= self.max_mem and cur_time[i] + self.fbw_cost[0] <= tmp and count[i][0] < self.n_micro:
|
| 138 |
+
for j in range(i + 1):
|
| 139 |
+
put(0, 0, j)
|
| 140 |
+
put(0, 1, i)
|
| 141 |
+
iter_chunk_ = 0
|
| 142 |
+
end_tmp = 0
|
| 143 |
+
for i in range(self.n_stage):
|
| 144 |
+
if i == 0:
|
| 145 |
+
end_tmp = cur_time[0] + self.fbw_cost[1]
|
| 146 |
+
continue
|
| 147 |
+
tmp = end_tmp + self.c_cost
|
| 148 |
+
while count[i][0] + count[i][1] < count[i - 1][0] + count[i - 1][1]:
|
| 149 |
+
for j in range(self.n_stage - 1, i - 1, -1):
|
| 150 |
+
if count[j][iter_chunk_] < self.n_micro:
|
| 151 |
+
put(0, iter_chunk_, j)
|
| 152 |
+
iter_chunk_ = 1 - iter_chunk_
|
| 153 |
+
# while mem[i] + self.fbw_mem[0] <= self.max_mem and cur_time[i] + self.fbw_cost[0] <= tmp:
|
| 154 |
+
# if iter_chunk_ == 0 and count[i][0] >= count[i - 1][0]:
|
| 155 |
+
# break
|
| 156 |
+
# for j in range(self.n_stage - 1, i - 1, -1):
|
| 157 |
+
# if count[j][iter_chunk_] < self.n_micro:
|
| 158 |
+
# put(0, iter_chunk_, j)
|
| 159 |
+
# iter_chunk_ = 1 - iter_chunk_
|
| 160 |
+
# end_tmp = max(tmp, cur_time[i]) + self.fbw_cost[1]
|
| 161 |
|
| 162 |
# init_bubble = get_max_stage_bubble()
|
| 163 |
# print(stage_bubble)
|
|
|
|
| 288 |
_str += _c
|
| 289 |
print(_str)
|
| 290 |
|
| 291 |
+
def get_v_schedule(self, only_run_time=False):
|
| 292 |
schedule, end_time, max_bubble = None, None, None
|
| 293 |
expected_time = sum(self.fbw_cost) * self.n_micro * 2
|
| 294 |
for fill_b in [True, False]:
|
|
|
|
| 301 |
max_bubble = _max_bubble
|
| 302 |
schedule = _schedule
|
| 303 |
end_time = _end_time
|
| 304 |
+
if only_run_time:
|
| 305 |
+
return max_bubble + expected_time
|
| 306 |
# self.print_details(end_time, print_scaling=1)
|
| 307 |
+
bubble_rate = max_bubble / (expected_time + max_bubble)
|
| 308 |
+
print("%2d %3d, [%5d %5d %5d %5d], %6d -> %6.4f" % \
|
| 309 |
+
(self.n_stage, self.n_micro, *self.fbw_cost, self.c_cost, self.max_mem // self.f_mem, bubble_rate))
|
| 310 |
local_order = [[] for _ in range(self.n_stage)]
|
| 311 |
comm_id = {}
|
| 312 |
comm_id_counter = 0
|
|
|
|
| 436 |
# (32, 96, 10419, 10207, 7715, 408, 6144, 48, 64),
|
| 437 |
# (32, 128, 10408, 10204, 7703, 408, 6144, 48, 64),
|
| 438 |
# (32, 256, 10402, 10248, 7698, 460, 6144, 48, 64),
|
| 439 |
+
# (4, 8, 6, 4, 4, 1, 4096, 32, 32),
|
| 440 |
# (8, 24, 29444, 29718, 19927, 527, 4096, 32, 32),
|
| 441 |
+
# ( 8, 32, 16099, 16504, 7589, 540, 2304, 24, 16),
|
| 442 |
+
(16, 48, 14407, 14380, 9676, 1610, 4096, 32, 32),
|
| 443 |
+
(16, 64, 14412, 14393, 9688, 1621, 4096, 32, 32),
|
| 444 |
+
(16, 128,14316, 14306, 9639, 1619, 4096, 32, 32),
|
| 445 |
+
(24, 72, 6763, 6969, 5251, 755, 5120, 40, 48),
|
| 446 |
+
(24, 96, 6783, 6984, 5259, 758, 5120, 40, 48),
|
| 447 |
+
(24, 192, 6785, 6990, 5260, 770, 5120, 40, 48),
|
| 448 |
+
(32, 96, 9458, 9748, 7288, 879, 6144, 48, 64),
|
| 449 |
+
(32, 128, 9469, 9744, 7306, 892, 6144, 48, 64),
|
| 450 |
+
(32, 256, 9447, 9644, 7193, 887, 6144, 48, 64),
|
| 451 |
]
|
| 452 |
s = 1024
|
| 453 |
|
| 454 |
# h, a, s = 4096, 32, 1024
|
| 455 |
# cost_f, cost_b, cost_w, cost_c = 29718, 29444, 19927, 527
|
| 456 |
+
for p, n, f, b, w, c, h, a, _ in settings:
|
| 457 |
mem_f = 34 * h + 5 * a * s
|
| 458 |
mem_w = - 32 * h
|
| 459 |
mem_b = - mem_w - mem_f
|
|
|
|
| 471 |
max_mem=mem_f * (p * 2 + m_offset),
|
| 472 |
)
|
| 473 |
graph.get_v_schedule()
|
| 474 |
+
break
|