维度变换法

B, H, W, C = x.shape
x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
# permute: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H//Mh, W//Mh, Mw, Mw, C]
# view: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B*num_windows, Mh, Mw, C]
windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
_, _, H, W = x.shape
# flatten: [B, C, H, W] -> [B, C, HW]
# transpose: [B, C, HW] -> [B, HW, C]
x = x.flatten(2).transpose(1, 2)
x0 = x[:, 0::2, 0::2, :]  # [B, H/2, W/2, C]
x1 = x[:, 1::2, 0::2, :]  # [B, H/2, W/2, C]
x2 = x[:, 0::2, 1::2, :]  # [B, H/2, W/2, C]
x3 = x[:, 1::2, 1::2, :]  # [B, H/2, W/2, C]
x = torch.cat([x0, x1, x2, x3], -1)  # [B, H/2, W/2, 4*C]
x = x.view(B, -1, 4 * C)  # [B, H/2*W/2, 4*C]
coords_h = torch.arange(self.window_size[0])
coords_w = torch.arange(self.window_size[1])
coords = torch.stack(torch.meshgrid([coords_h, coords_w], indexing="ij"))  # [2, Mh, Mw]
coords_flatten = torch.flatten(coords, 1)  # [2, Mh*Mw]
# [2, Mh*Mw, 1] - [2, 1, Mh*Mw]
relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # [2, Mh*Mw, Mh*Mw]
relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # [Mh*Mw, Mh*Mw, 2]
 mask_windows = window_partition(img_mask, self.window_size)  # [nW, Mh, Mw, 1]
 mask_windows = mask_windows.view(-1, self.window_size * self.window_size)  # [nW, Mh*Mw]
 attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)  # [nW, 1, Mh*Mw] - [nW, Mh*Mw, 1]
        # [nW, Mh*Mw, Mh*Mw]
x = self.norm(x)  # [B, L, C]
x = self.avgpool(x.transpose(1, 2))  # [B, C, 1]
B, N, C = x.shape

# qkv(): -> [batch_size, num_patches + 1, 3 * total_embed_dim]
# reshape: -> [batch_size, num_patches + 1, 3, num_heads, embed_dim_per_head]
# permute: -> [3, batch_size, num_heads, num_patches + 1, embed_dim_per_head]
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
# [batch_size, num_heads, num_patches + 1, embed_dim_per_head]
q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)

# transpose: -> [batch_size, num_heads, embed_dim_per_head, num_patches + 1]
 # @: multiply -> [batch_size, num_heads, num_patches + 1, num_patches + 1]
 attn = (q @ k.transpose(-2, -1)) * self.scale
# [B, C, H, W] -> [B, num_patches, embed_dim]
x = self.patch_embed(x)  # [B, 196, 768]
# [1, 1, 768] -> [B, 1, 768]
cls_token = self.cls_token.expand(x.shape[0], -1, -1)
if self.dist_token is None:
x = torch.cat((cls_token, x), dim=1)  # [B, 197, 768]
self.norm(x.mean([-2, -1]))  # global average pooling, (N, C, H, W) -> (N, C)
img = Image.open(img_path)
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
 # expand batch dimension
img = torch.unsqueeze(img, dim=0)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

x = self.inception5b(x)
# N x 1024 x 7 x 7

x = self.avgpool(x)
# N x 1024 x 1 x 1
x = torch.flatten(x, 1)
# N x 1024
self.averagePool = nn.AvgPool2d(kernel_size=5, stride=3)
elf.conv = BasicConv2d(in_channels, 128, kernel_size=1)  # output[batch, 128, 4, 4]



    
# aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
x = self.averagePool(x)
# aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
x = self.conv(x)
# N x 128 x 4 x 4
x = torch.flatten(x, 1)###从channel维度开始展平
x = F.dropout(x, 0.5, training=self.training)
 # N x 2048
import torch
import torch.nn.functional as F
 
input = torch.randn(3,4)
print(input)
tensor([[-0.5526, -0.0194,  2.1469, -0.2567],
        [-0.3337, -0.9229,  0.0376, -0.0801],
        [ 1.4721,  0.1181, -2.6214,  1.7721]])
 
b = F.softmax(input,dim=0) # 按列SoftMax,列和为1
print(b)
tensor([[0.1018, 0.3918, 0.8851, 0.1021],
        [0.1268, 0.1587, 0.1074, 0.1218],
        [0.7714, 0.4495, 0.0075, 0.7762]])
 
c = F.softmax(input,dim=1)   # 按行SoftMax,行和为1
print(c)
tensor([[0.0529, 0.0901, 0.7860, 0.0710],
        [0.2329, 0.1292, 0.3377, 0.3002],
        [0.3810, 0.0984, 0.0064, 0.5143]])
 
d = torch.max(input,dim=0)    # 按列取max,
print(d)
torch.return_types.max(
values=tensor([1.4721, 0.1181, 2.1469, 1.7721]),
indices=tensor([2, 2, 0, 2]))
 
e = torch.max(input,dim=1)   # 按行取max,
print(e)
torch.return_types.max(
values=tensor([2.1469, 0.0376, 1.7721]),
indices=tensor([2, 2, 3]))
下面看看三维tensor解释例子:

函数softmax输出的是所给矩阵的概率分布;

b输出的是在dim=0维上的概率分布,b[0][5][6]+b[1][5][6]+b[2][5][6]=1
非常感谢!softmax(dim=i),假设归一化后的结果为x,那么对x的第i维度求和后得到的所有值都为1.

a=torch.rand(3,16,20)
 
b=F.softmax(a,dim=0)
 
c=F.softmax(a,dim=1)
 
d=F.softmax(a,dim=2)
 
 
In [1]: import torch as t
 
 
 
In [2]: import torch.nn.functional as F
 
 
 
In [4]: a=t.Tensor(3,4,5)
 
 
 
In [5]: b=F.softmax(a,dim=0)
 
 
 
In [6]: c=F.softmax(a,dim=1)
 
 
 
In [7]: d=F.softmax(a,dim=2)
 
 
 
In [8]: a
 
Out[8]: 
 
tensor([[[-0.1581,  0.0000,  0.0000,  0.0000, -0.0344],
 
         [ 0.0000, -0.0344,  0.0000, -0.0344,  0.0000],
 
         [-0.0344,  0.0000, -0.0344,  0.0000, -0.0344],
 
         [ 0.0000, -0.0344,  0.0000, -0.0344,  0.0000]],
 
 
 
        [[-0.0344,  0.0000, -0.0344,  0.0000, -0.0344],
 
         [ 0.0000, -0.0344,  0.0000, -0.0344,  0.0000],
 
         [-0.0344,  0.0000, -0.0344,  0.0000, -0.0344],
 
         [ 0.0000, -0.0344,  0.0000, -0.0344,  0.0000]],
 
 
 
        [[-0.0344,  0.0000, -0.0344,  0.0000, -0.0344],
 
         [ 0.0000, -0.0344,  0.0000, -0.0344,  0.0000],
 
         [-0.0344,  0.0000, -0.0344,  0.0000, -0.0344],
 
         [ 0.0000, -0.0344,  0.0000, -0.0344,  0.0000]]])
 
 
 
In [9]: b
 
Out[9]: 
 
tensor([[[0.3064, 0.3333, 0.3410, 0.3333, 0.3333],
 
         [0.3333, 0.3333, 0.3333, 0.3333, 0.3333],
 
         [0.3333, 0.3333, 0.3333, 0.3333, 0.3333],
 
         [0.3333, 0.3333, 0.3333, 0.3333, 0.3333]],
 
 
 
        [[0.3468, 0.3333, 0.3295, 0.3333, 0.3333],
 
         [0.3333, 0.3333, 0.3333, 0.3333, 0.3333],
 
         [0.3333, 0.3333, 0.3333, 0.3333, 0.3333],
 
         [0.3333, 0.3333, 0.3333, 0.3333, 0.3333]],
 
 
 
        [[0.3468, 0.3333, 0.3295, 0.3333, 0.3333],
 
         [0.3333, 0.3333, 0.3333, 0.3333, 0.3333],
 
         [0.3333, 0.3333, 0.3333, 0.3333, 0.3333],
 
         [0.3333, 0.3333, 0.3333, 0.3333, 0.3333]]])
 
 
 
In [10]: b.sum()
 
Out[10]: tensor(20.0000)
 
 
 
In [11]: b[0][0][0]+b[1][0][0]+b[2][0][0]
 
Out[11]: tensor(1.0000)
 
 
 
In [12]: c.sum()
 
Out[12]: tensor(15.)
 
 
 
In [13]: c
 
Out[13]: 
 
tensor([[[0.2235, 0.2543, 0.2521, 0.2543, 0.2457],
 
         [0.2618, 0.2457, 0.2521, 0.2457, 0.2543],
 
         [0.2529, 0.2543, 0.2436, 0.2543, 0.2457],
 
         [0.2618, 0.2457, 0.2521, 0.2457, 0.2543]],
 
 
 
        [[0.2457, 0.2543, 0.2457, 0.2543, 0.2457],
 
         [0.2543, 0.2457, 0.2543, 0.2457, 0.2543],
 
         [0.2457, 0.2543, 0.2457, 0.2543, 0.2457],
 
         [0.2543, 0.2457, 0.2543, 0.2457, 0.2543]],
 
 
 
        [[0.2457, 0.2543, 0.2457, 0.2543, 0.2457],
 
         [0.2543, 0.2457, 0.2543, 0.2457, 0.2543],
 
         [0.2457, 0.2543, 0.2457, 0.2543, 0.2457],
 
         [0.2543, 0.2457, 0.2543, 0.2457, 0.2543]]])
 
 
 
In [14]: n=t.rand(3,4)
 
 
 
In [15]: n
 
Out[15]: 
 
tensor([[0.2769, 0.3475, 0.8914, 0.6845],
 
        [0.9251, 0.3976, 0.8690, 0.4510],
 
        [0.8249, 0.1157, 0.3075, 0.3799]])
 
 
 
In [16]: m=t.argmax(n,dim=0)
 
 
 
In [17]: m
 
Out[17]: tensor([1, 1, 0, 0])
 
 
 
In [18]: p=t.argmax(n,dim=1)
 
 
 
In [19]: p
 
Out[19]: tensor([2, 0, 0])
 
 
 
In [20]: d.sum()
 
Out[20]: tensor(12.0000)
 
 
 
In [22]: d
 
Out[22]: 
 
tensor([[[0.1771, 0.2075, 0.2075, 0.2075, 0.2005],
 
         [0.2027, 0.1959, 0.2027, 0.1959, 0.2027],
 
         [0.1972, 0.2041, 0.1972, 0.2041, 0.1972],
 
         [0.2027, 0.1959, 0.2027, 0.1959, 0.2027]],
 
 
 
        [[0.1972, 0.2041, 0.1972, 0.2041, 0.1972],
 
         [0.2027, 0.1959, 0.2027, 0.1959, 0.2027],
 
         [0.1972, 0.2041, 0.1972, 0.2041, 0.1972],
 
         [0.2027, 0.1959, 0.2027, 0.1959, 0.2027]],
 
 
 
        [[0.1972, 0.2041, 0.1972, 0.2041, 0.1972],
 
         [0.2027, 0.1959, 0.2027, 0.1959, 0.2027],
 
         [0.1972, 0.2041, 0.1972, 0.2041, 0.1972],
 
         [0.2027, 0.1959, 0.2027, 0.1959, 0.2027]]])
 
 
 
In [23]: d[0][0].sum()
 
Out[23]: tensor(1.)

你可能感兴趣的:(python)