-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDilatedEncoder
136 lines (123 loc) · 6.23 KB
/
DilatedEncoder
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import torch
import torch.nn as nn
#------------------------------------------#
# 此为对于残差块的定义,其在DilatedEncoder
# 的残差结构中被应用
# 一个残差块共有三个卷积+正则化+relu
# 卷积核的大小分别为 1*1,3*3,1*1
#------------------------------------------#
class Bottleneck(nn.Module):
def __init__(self,in_channels=512,mid_channels=128,dilation=1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Sequential( #卷积+正则化+relu,其中为1*1的卷积
nn.Conv2d(in_channels, mid_channels, kernel_size=1, padding=0),
nn.BatchNorm2d(mid_channels),
nn.ReLU(inplace=True)
)
self.conv2 = nn.Sequential( #卷积+正则化+relu,其中为3*3的卷积
nn.Conv2d(mid_channels, mid_channels,kernel_size=3, padding=dilation, dilation=dilation),
nn.BatchNorm2d(mid_channels),
nn.ReLU(inplace=True)
)
self.conv3 = nn.Sequential( #卷积+正则化+relu,其中为1*1的卷积
nn.Conv2d(mid_channels, in_channels, kernel_size=1, padding=0),
nn.BatchNorm2d(in_channels),
nn.ReLU(inplace=True)
)
def forward(self,x):
out = self.conv1(x)
out = self.conv2(out)
out = self.conv3(out)
out = out + x
return out
#------------------------------------------#
# DilatedEncoder结构
#------------------------------------------#
class DilatedEncoder(nn.Module):
def __init__(self,in_channels=1024,encoder_channels=512,block_mid_channels=128,num_residual_blocks=4,block_dilations=[2, 4, 6, 8]):
super(DilatedEncoder, self).__init__()
self.in_channels = in_channels
self.encoder_channels = encoder_channels
self.block_mid_channels = block_mid_channels
self.num_residual_blocks = num_residual_blocks
self.block_dilations = block_dilations
#这个断言的作用是根据残差块的个数来确定block_dilations的长度
#每个残差块中所对应的膨胀卷积率
assert len(self.block_dilations) == self.num_residual_blocks
# 初始化两个函数
self._init_layers()
self._init_weight()
#--------------------------------------#
# 此为对于投影层以及残差结构的初始化操作
# 残差结构中共有四个残差块
#--------------------------------------#
def _init_layers(self):
#-----------------------------#
#首先为先进行投影层的初始化操作
#-----------------------------#
#首先为1*1的卷积+正则化
self.lateral_conv = nn.Conv2d(self.in_channels,self.encoder_channels,kernel_size=1)
self.lateral_norm = nn.BatchNorm2d(self.encoder_channels)
#接下来为3*3的卷积加正则化
self.fpn_conv = nn.Conv2d(self.encoder_channels,self.encoder_channels,kernel_size=3,padding=1)
self.fpn_norm = nn.BatchNorm2d(self.encoder_channels)
#-----------------------------#
#接下来进行残差结构的初始化操作
#-----------------------------#
encoder_blocks = []
#在这个结构中,共有四个残差块,每个残差块均由三个conv+bn+relu组成
for i in range(self.num_residual_blocks):
# 分配每一个残差块中的卷积所需要的膨胀率,4个残差块从左至右依次为 2,4,6,8
dilation = self.block_dilations[i]
# 对每个残差块进行初始化
encoder_blocks.append(
Bottleneck(
self.encoder_channels,
self.block_mid_channels,
dilation=dilation
)
)
#将四个残差块依次进行排列,进而得到了我们的残差结构
self.dilated_encoder_blocks = nn.Sequential(*encoder_blocks)
def xavier_init(self, layer):
if isinstance(layer, nn.Conv2d): #对于layer中的二维卷积操作进行相应的初始化
#---------------------------------------------#
# xavier的基本思想是通过网络层时,输入和输出的方差相同,包括前向传播和反向传播
# 用于激活函数中, 此处code的含义为服从均匀分布
#---------------------------------------------#
nn.init.xavier_uniform_(layer.weight, gain=1)
#-------------------------------------------#
# 初始化Dilated Encoder并进行前向传播函数的定义
#-------------------------------------------#
def _init_weight(self):
#为投影层中的卷积操作添加xavier
self.xavier_init(self.lateral_conv)
self.xavier_init(self.fpn_conv)
#对投影层中的正则化操作进行相应的限制
for m in [self.lateral_norm, self.fpn_norm]:
nn.init.constant_(m.weight, 1) #使用1来填充weight的值
nn.init.constant_(m.bias, 0) #使用0来填充bias的值,表示偏移量为0
# 对于残差结构中的四个残差块
for m in self.dilated_encoder_blocks.modules():
# 对于conv操作
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight, mean=0, std=0.01) #使其服从一个正太分布操作
if hasattr(m, 'bias') and m.bias is not None:#如果这个卷积操作存在偏置项bias且不为空,则我们将其值初始为0
nn.init.constant_(m.bias, 0)
# 对于正则化操作
if isinstance(m, (nn.GroupNorm, nn.BatchNorm2d, nn.SyncBatchNorm)):
nn.init.constant_(m.weight, 1) #将其weight值初始化为常数
nn.init.constant_(m.bias, 0) #将其bias值初始化为常数
def forward(self, feature): #可以理解为对于一个张量的操作
out = self.lateral_norm(self.lateral_conv(feature)) #为先过卷积再过池化,投影层的第一层卷积
out = self.fpn_norm(self.fpn_conv(out)) #先过卷积再过池化,投影层的第二层卷积
out = self.dilated_encoder_blocks(out) #最后经过残差结构,返回结果
return out
x = torch.rand(16, 1024, 13, 13)
print('As Begin!!!')
print(x.shape)
encoder = DilatedEncoder()
y = encoder(x)
print('After Dilated Encoder !!!')
print(y.shape)
#print(encoder)