手写全连接 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 class Network (object ): def __init__ (self, input_size, hidden_size, output_size, lr ): self.A1 = init_weights((input_size, hidden_size)) self.b1 = init_weights((hidden_size)) self.A2 = init_weights((hidden_size, output_size)) self.b2 = init_weights((output_size)) self.lr = lr def check (self, x) : z1 = np.matmul(x, self.A1) + self.b1 a1 = relu(z1) z2 = np.matmul(a1, self.A2) + self.b2 return np.argmax(z2) def step (self, x_batch, y_batch ): batch_size = len (x_batch) batch_loss = 0 batch_acc = 0 z1 = np.matmul(x_batch, self.A1) + self.b1 a1 = relu(z1) z2 = np.matmul(a1, self.A2) + self.b2 a2 = softmax(z2) for i in range (0 , batch_size) : a2[i] /= np.sum (a2[i]) batch_loss += - np.log(a2[i][np.argmax(y_batch[i])]) batch_acc += np.argmax(a2[i]) == np.argmax(y_batch[i]) d_loss = a2 - y_batch d_b2 = np.sum (d_loss, axis=0 ) d_A2 = np.matmul(a1.T, d_loss) d_a1 = np.matmul(d_loss, self.A2.T) d_z1 = d_a1 * relu_prime(z1) d_b1 = np.sum (d_z1, axis=0 ) d_A1 = np.matmul(x_batch.T, d_z1) self.A2 -= self.lr * d_A2 / batch_size self.b2 -= self.lr * d_b2 / batch_size self.A1 -= self.lr * d_A1 / batch_size self.b1 -= self.lr * d_b1 / batch_size return [batch_loss, batch_acc]
用 torch 的全连接 数据集:MINST
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 class Net (nn.Module): def __init__ (self ): super (Net, self).__init__() self.fc1 = nn.Linear(784 , 256 ) self.fc2 = nn.Linear(256 , 32 ) self.fc3 = nn.Linear(32 , 10 ) def forward (self, x ): x = x.view(-1 , 784 ) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return F.log_softmax(x, dim=1 ) model = Net() criterion = nn.CrossEntropyLoss() for epoch in range (EPOCHS): model.train() optimizer = optim.SGD(model.parameters(), lr=0.1 ) for batch_idx, (data, target) in enumerate (train_loader): optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() val_loss = [] model.eval () with torch.no_grad(): for data, target in val_loader: output = model(data) val_loss.append(criterion(output, target).item())
CNN 数据集 CIFAR10
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 transform = transforms.Compose( [ transforms.RandomHorizontalFlip(), transforms.RandomRotation(10 ), transforms.ToTensor(), ] ) class Net (nn.Module): def __init__ (self ): super (Net, self).__init__() self.conv1 = nn.Conv2d(in_channels = 3 , out_channels = 64 , kernel_size = 3 , stride = 1 , padding = 1 ) self.conv2 = nn.Conv2d(in_channels = 64 , out_channels = 64 , kernel_size = 3 , stride = 1 , padding = 1 ) self.pool1 = nn.MaxPool2d(2 , 2 ) self.bn1 = nn.BatchNorm2d(64 ) self.relu1 = nn.ReLU() self.conv3 = nn.Conv2d(in_channels = 64 , out_channels = 128 , kernel_size = 3 , stride = 1 , padding = 1 ) self.conv4 = nn.Conv2d(in_channels = 128 , out_channels = 128 , kernel_size = 3 , stride = 1 , padding = 1 ) self.pool2 = nn.MaxPool2d(2 , 2 ) self.bn2 = nn.BatchNorm2d(128 ) self.relu2 = nn.ReLU() self.conv5 = nn.Conv2d(in_channels = 128 , out_channels = 256 , kernel_size = 3 , stride = 1 , padding = 1 ) self.conv6 = nn.Conv2d(in_channels = 256 , out_channels = 256 , kernel_size = 3 , stride = 1 , padding = 1 ) self.conv7 = nn.Conv2d(in_channels = 256 , out_channels = 256 , kernel_size = 3 , stride = 1 , padding = 1 ) self.pool3 = nn.MaxPool2d(2 , 2 ) self.bn3 = nn.BatchNorm2d(256 ) self.relu3 = nn.ReLU() self.fc1 = nn.Linear(256 * 4 * 4 , 512 ) self.bn4 = nn.BatchNorm1d(512 ) self.relu4 = nn.ReLU() self.fc2 = nn.Linear(512 , 128 ) self.bn5 = nn.BatchNorm1d(128 ) self.relu5 = nn.ReLU() self.fc3 = nn.Linear(128 , 10 ) def forward (self, x ) : x = self.relu1(self.bn1(self.pool1(self.conv2(self.conv1(x))))) x = self.relu2(self.bn2(self.pool2(self.conv4(self.conv3(x))))) x = self.relu3(self.bn3(self.pool3(self.conv7(self.conv6(self.conv5(x)))))) x = x.view(-1 , 256 * 4 * 4 ) x = self.relu4(self.bn4(self.fc1(x))) x = self.relu5(self.bn5(self.fc2(x))) x = self.fc3(x) return x
DCGAN (Deep Convolutional GAN)数据集:CelebA,代码在 torch 官网上直接可以抄
Generator:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 class Generator (nn.Module): def __init__ (self ): super (Generator, self).__init__() self.net = nn.Sequential( nn.ConvTranspose2d( nz, ngf * 8 , 4 , 1 , 0 , bias=False ), nn.BatchNorm2d(ngf * 8 ), nn.ReLU(True ), nn.ConvTranspose2d(ngf * 8 , ngf * 4 , 4 , 2 , 1 , bias=False ), nn.BatchNorm2d(ngf * 4 ), nn.ReLU(True ), nn.ConvTranspose2d( ngf * 4 , ngf * 2 , 4 , 2 , 1 , bias=False ), nn.BatchNorm2d(ngf * 2 ), nn.ReLU(True ), nn.ConvTranspose2d( ngf * 2 , ngf, 4 , 2 , 1 , bias=False ), nn.BatchNorm2d(ngf), nn.ReLU(True ), nn.ConvTranspose2d( ngf, nc, 4 , 2 , 1 , bias=False ), nn.Tanh() ) def forward (self, input ): return self.net(input )
Discriminator
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 class Discriminator (nn.Module): def __init__ (self ): super (Discriminator, self).__init__() self.net = nn.Sequential( nn.Conv2d(nc, ndf, 4 , 2 , 1 , bias=False ), nn.LeakyReLU(0.2 , inplace=True ), nn.Conv2d(ndf, ndf * 2 , 4 , 2 , 1 , bias=False ), nn.BatchNorm2d(ndf * 2 ), nn.LeakyReLU(0.2 , inplace=True ), nn.Conv2d(ndf * 2 , ndf * 4 , 4 , 2 , 1 , bias=False ), nn.BatchNorm2d(ndf * 4 ), nn.LeakyReLU(0.2 , inplace=True ), nn.Conv2d(ndf * 4 , ndf * 8 , 4 , 2 , 1 , bias=False ), nn.BatchNorm2d(ndf * 8 ), nn.LeakyReLU(0.2 , inplace=True ), nn.Conv2d(ndf * 8 , 1 , 4 , 1 , 0 , bias=False ), nn.Sigmoid() ) def forward (self, input ): return self.net(input )
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 for epoch in range (num_epochs): for i, data in enumerate (dataloader, 0 ): netD.zero_grad() real_cpu = data[0 ].to(device) b_size = real_cpu.size(0 ) label = torch.full((b_size,), real_label, dtype=torch.float , device=device) output = netD(real_cpu).view(-1 ) errD_real = criterion(output, label) errD_real.backward() noise = torch.randn(b_size, nz, 1 , 1 , device=device) fake = netG(noise) label.fill_(fake_label) output = netD(fake.detach()).view(-1 ) errD_fake = criterion(output, label) errD_fake.backward() errD = errD_real + errD_fake optimizerD.step() netG.zero_grad() label.fill_(real_label) output = netD(fake).view(-1 ) errG = criterion(output, label) errG.backward() optimizerG.step() G_losses.append(errG.item()) D_losses.append(errD.item())
RNN,手写 LSTM 做情感分类,数据集 imdb 电影
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 class LSTM (nn.Module): def init_weights (self ): w = 1.0 / np.sqrt(self.hidden_size) for weight in self.parameters(): weight.data.uniform_(-w, w) def __init__ (self, input_size, hidden_size ): super (LSTM, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.U_f = nn.Parameter(torch.Tensor(input_size, hidden_size)) self.V_f = nn.Parameter(torch.Tensor(hidden_size, hidden_size)) self.b_f = nn.Parameter(torch.Tensor(hidden_size)) self.U_i = nn.Parameter(torch.Tensor(input_size, hidden_size)) self.V_i = nn.Parameter(torch.Tensor(hidden_size, hidden_size)) self.b_i = nn.Parameter(torch.Tensor(hidden_size)) self.U_c = nn.Parameter(torch.Tensor(input_size, hidden_size)) self.V_c = nn.Parameter(torch.Tensor(hidden_size, hidden_size)) self.b_c = nn.Parameter(torch.Tensor(hidden_size)) self.U_o = nn.Parameter(torch.Tensor(input_size, hidden_size)) self.V_o = nn.Parameter(torch.Tensor(hidden_size, hidden_size)) self.b_o = nn.Parameter(torch.Tensor(hidden_size)) self.init_weights() def forward (self, x) : bs, seq_len, dim = x.size() hidden_seq = [] h_t = h_0 c_t = c_0 for t in range (seq_len): x_t = x[:, t, :] i_t = torch.sigmoid(x_t @ self.U_i + h_t @ self.V_i + self.b_i) f_t = torch.sigmoid(x_t @ self.U_f + h_t @ self.V_f + self.b_f) g_t = torch.tanh(x_t @ self.U_c + h_t @ self.V_c + self.b_c) o_t = torch.sigmoid(x_t @ self.U_o + h_t @ self.V_o + self.b_o) c_t = f_t * c_t + i_t * g_t h_t = o_t * torch.tanh(c_t) hidden_seq.append(h_t.unsqueeze(0 )) hidden_seq = torch.cat(hidden_seq, dim=0 ) hidden_seq = hidden_seq.transpose(0 , 1 ).contiguous() return hidden_seq
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 class Net (nn.Module): def __init__ (self, embedding_size = 64 , hidden_size = 64 , mlp_embedding_dim = 64 , num_classes = 2 ): super (Net, self).__init__() self.embedding = nn.Embedding(vocab_size, embedding_size) self.lstm = LSTM(input_size = embedding_size, hidden_size = hidden_size) self.linear1 = nn.Linear(in_features = hidden_size, out_features = mlp_embedding_dim) self.act1 = torch.nn.ReLU() self.linear2 = nn.Linear(in_features = mlp_embedding_dim, out_features = num_classes) def forward (self, x ): x = self.embedding(x) x = self.lstm.forward(x) x = torch.mean(x, dim = 1 ) x = self.linear1(x) x = self.act1(x) x = self.linear2(x) return x
Transfomer 代码略
期末复习笔记 目标检测
IoU,NMS 非极大值抑制
R-CNN,SPP Net(对 R-CNN 改进),再改进:Fast R-CNN,提出了 Region of interest,将分类器边框和特征题取器一起训练,仍然使用 Selective search ,Faster R-CNN,使用 Regional proposal 代替 Selective search
YOLO 划分格子 + NMS, v2,加先验框,SSD
图像分割
像素级分类,Fully convolutional network (FCN),skip - connection
Segnet, PSPnet, U-net
像素级的交叉熵 大物体的 loss 大,用 Dice 系数解决这个问题
姿态估计
top-down,先框后点
bottom-up,先点后框
Convolutional Pose Machine, CPM
openpose = CPM + bottom-up
pose proposal network PPN = YOLO + OpenPose
GAN (Vanilla GAN)
优化目标函数: ,到达最优时 以 0.5 的概率判别是 fake 还是 real
(事先将 D 设置为最优:可能会导致“模型训练不稳定”,“梯度小时”,“G 无法捕捉到真实数据分布“)
DCGAN — Deep Convolutional GAN
用卷积层
Loss: 使用均方误差 MSE 会导致一些细节不明显
Variational Autoencoder
X 解码为 z,z 通过 G 得到 ,然后算 L2 Loss
Conditional GAN
BiGAN (带 Encoder) ,无监督
CoGAN (换脸,换胡子,换头发 … )
Cycle GAN (学习未知的映射关系,和 CoGAN 差不多,风景转油画 … )
RNN (Recurrent Neural Network)
Word Embedding
WordVec,Continuous Bag-of-Words,根据上下文预测中间的词,随机采样负样本(噪声对比估计 Noise - Contrastive Estimation)
LSTM
用来做情感分类,many to one,hw9
Transformer
机器翻译,many to many
Attention ( multi - head ),transformer 的结构,hw10
GPT Generative Pre-Trained Transformer,是基于 Transformer 的预训练提出的生成式语言模型(预测下一个词的概率分布)(单向预测)
BERT 双向上下文预测
均无监督,NLP - Natural Language Processing
AI 系统
数据获取,数据预处理,建模调参,系统部署
解析 HTML 语言的 python 工具包,beautifulsoup4。selenium 工具
XPath,在 XML 文档中查找信息的语言
深度学习利用网络自动题取特征,例如一些 CNN 实际上是题取出了特征
特征选择
1)Filter
Pearson 相关系数
Lasso 特征选择(因为有很多维数是 0)
Gini index:子树每种 label 的占比 ,Gini index 为
Gini decrease:
Gini importance:
使用随机森林训练后可以获得 Gini 系数
2) Wrapper
搜索,迭代,permutation 一下数据的特征,SHAP
集成学习
Bagging / variance
Boosting / bias
Stacking / variance
AutoGluon 可以自动进行模型选择
模型测试
超参数优化 HPO, Hyper Parameter Optimazation
神经网络架构搜索 NAS,Neural Architecture Search
HPO:多粒度(子数据集,减小模型大小,早停)
NAS:强化学习(速度慢),One-shot 方法(从头训练最有希望的候选项,只关心几个 epoch 后候选项的排名)