lr_scheduler
整理了一些常见的lr scheduler
环境:py3.9.13 + torch1.12.1
1 | import torch |
-
lambdaLR
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21"""
LambdaLR: (optimizer, lr_lambda, last_epoch=-1)
last_epoch: 断点续训时,中断前的epoch。默认为-1,即从头开始训练
new_lr = λ * initial_lr
"""
def lambdaLR_test():
net = model()
optimizer = torch.optim.Adam(net.parameters(), lr=initial_lr)
scheduler = LambdaLR(optimizer, lr_lambda=lambda epoch: 1/(epoch+1))
print('初始化学习率: ', optimizer.defaults['lr'])
for epoch in range(1, 11):
# train...
optimizer.zero_grad()
optimizer.step()
print('第%d个epoch的学习率: %f' % (epoch, optimizer.param_groups[0]['lr']))
writer.add_scalar("LambdaLR", optimizer.param_groups[0]['lr'], epoch) # tensorboard面板,Smoothing调为0;且清除logs/LambdaLR下无用文件
scheduler.step() -
StepLR
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20"""
StepLR: (optimizer, step_size, gamma=0.1, last_epoch=-1)
new_lr = initial_lr * γ^(epoch//step_size)
"""
def StepLR_test():
net = model()
optimizer = torch.optim.Adam(net.parameters(), lr=initial_lr)
scheduler = StepLR(optimizer, step_size=3, gamma=0.1) # 每3个epoch更新一次lr
print('初始化学习率: ', optimizer.defaults['lr'])
for epoch in range(1, 11):
# train...
optimizer.zero_grad()
optimizer.step()
print('第%d个epoch的学习率: %f' % (epoch, optimizer.param_groups[0]['lr']))
writer.add_scalar("StepLR", optimizer.param_groups[0]['lr'], epoch)
scheduler.step() -
MultiStepLR
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20"""
MultiStepLR: (optimizer, milestones, gamma=0.1, last_epoch=-1)
new_lr = initial_lr * γ^(bisect_right * (milestones.epoch))
"""
def MultiStepLR_test():
net = model()
optimizer = torch.optim.Adam(net.parameters(), lr=initial_lr)
scheduler = MultiStepLR(optimizer, milestones=[3, 7], gamma=0.1) # 在第3和第7个迭代时更新lr
print('初始化学习率: ', optimizer.defaults['lr'])
for epoch in range(1, 11):
# train...
optimizer.zero_grad()
optimizer.step()
print('第%d个epoch的学习率: %f' % (epoch, optimizer.param_groups[0]['lr']))
writer.add_scalar("MultiStepLR", optimizer.param_groups[0]['lr'], epoch)
scheduler.step() -
ExponentialLR
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20"""
ExponentialLR: (optimizer, gamma, last_epoch=-1)
new_lr = initial_lr * γ^(epoch)
"""
def ExponentialLR_test():
net = model()
optimizer = torch.optim.Adam(net.parameters(), lr=initial_lr)
scheduler = ExponentialLR(optimizer, gamma=0.1)
print('初始化学习率: ', optimizer.defaults['lr'])
for epoch in range(1, 11):
# train...
optimizer.zero_grad()
optimizer.step()
print('第%d个epoch的学习率: %f' % (epoch, optimizer.param_groups[0]['lr']))
writer.add_scalar("ExponentialLR", optimizer.param_groups[0]['lr'], epoch)
scheduler.step() -
CosineAnnealingLR
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21"""
CosineAnnealingLR: (optimizer, T_max, eta_min=0, last_epoch=-1) 2*T_max个epoch之后重新设置lr.(T_max为周期)
eta_min: lr最小值
new_lr = eta_min + (initial_lr - eta_min) * (1 + cos((epoch/T_max)*π))
"""
def CosineAnnealingLR_test():
net = model()
optimizer = torch.optim.Adam(net.parameters(), lr=initial_lr)
scheduler = CosineAnnealingLR(optimizer, T_max=10)
print('初始化学习率: ', optimizer.defaults['lr'])
for epoch in range(1, 101):
# train...
optimizer.zero_grad()
optimizer.step()
print('第%d个epoch的学习率: %f' % (epoch, optimizer.param_groups[0]['lr']))
writer.add_scalar("CosineAnnealingLR", optimizer.param_groups[0]['lr'], epoch)
scheduler.step() -
ReduceLROnPlateau
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22"""
ReduceLROnPlateau: (optimizer, mode='min', factor=0.1, patience=10, verbose=False, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08)
基于训练过程中的某些测量值对学习率进行动态下降
new_lr = λ * old_lr
"""
def ReduceLROnPlateau_test():
net = model()
optimizer = torch.optim.Adam(net.parameters(), lr=initial_lr)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2)
print('初始化学习率: ', optimizer.defaults['lr'])
for epoch in range(1, 15):
# train...
train_loss = 2
optimizer.zero_grad()
optimizer.step()
print('第%d个epoch的学习率: %f' % (epoch, optimizer.param_groups[0]['lr']))
writer.add_scalar("ReduceLROnPlateau", optimizer.param_groups[0]['lr'], epoch)
scheduler.step(train_loss) -
WarmUp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41class WarmUpLR(_LRScheduler):
def __init__(self, optimizer, total_iters, last_epoch=-1):
"""
total_iters: 前n个epoch,或前n个batch,lr进行warmup(即lr从低到高)。lr随epoch变化还是随batch变化,可任意调节
"""
self.total_iters = total_iters
super().__init__(optimizer, last_epoch)
def get_lr(self):
"""
self.base_lrs: optimizer的初始lr,即warmup后的最大lr
self.last_epoch: 当前的epoch index,或当前的batch index。每次执行warmup_scheduler.step(),self.last_epoch都会加一。
"""
return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs]
def warmUpLR_test():
net = model()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)
# scheduler = CosineAnnealingLR(optimizer, 20)
scheduler = LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (epoch + 1))
warmup_epoch = 10 # 这里我们假设,lr随epoch变化,前5个epoch进行lr warmup
warmup_scheduler = WarmUpLR(optimizer, warmup_epoch) # 这行代码执行后,optimizer.param_groups[0]['lr']会从0.001变为0.0
print('初始化学习率: ', optimizer.defaults['lr']) # default输入值,0.001
for epoch in range(1, 50):
# train...
optimizer.zero_grad()
optimizer.step()
print('第%d个epoch的学习率: %f' % (epoch, optimizer.param_groups[0]['lr']))
writer.add_scalar("warmUpLR", optimizer.param_groups[0]['lr'], epoch)
if epoch <= warmup_epoch:
warmup_scheduler.step()
else:
scheduler.step()
评论