深度学习回归任务训练代码模版 [TOC]
参数设置 超参设置:config
包含所有训练需要的超参数(便于后续的调参),以及模型需要存储的位置
1 2 3 4 5 6 7 8 9 10 11 device = 'cuda' if torch.cuda.is_available() else 'cpu' config = { 'seed' : 5201314 , 'select_all' : False , 'valid_ratio' : 0.2 , 'n_epochs' : 3000 , 'batch_size' : 256 , 'learning_rate' : 1e-5 , 'early_stop' : 400 , 'save_path' : './models/model.ckpt' }
功能函数 导入需要的Python包
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 import mathimport numpy as npimport pandas as pdimport osimport csvfrom tqdm.notebook import tqdmimport torch import torch.nn as nnfrom torch.utils.data import Dataset, DataLoader, random_splitfrom torchviz import make_dotfrom torch.utils.tensorboard import SummaryWriter
一些重要的方法(随机种子设置、数据拆分、模型预测)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 def same_seed (seed ): """ 设置随机种子(便于复现) """ torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) print (f'Set Seed = {seed} ' ) def train_valid_split (data_set, valid_ratio, seed ): """ 数据集拆分成训练集(training set)和 验证集(validation set) """ valid_set_size = int (valid_ratio * len (data_set)) train_set_size = len (data_set) - valid_set_size train_set, valid_set = random_split(data_set, [train_set_size, valid_set_size], generator=torch.Generator().manual_seed(seed)) return np.array(train_set), np.array(valid_set) def predict (test_loader, model, device ): model.eval () preds = [] for x in tqdm(test_loader): x = x.to(device) with torch.no_grad(): pred = model(x) preds.append(pred.detach().cpu()) preds = torch.cat(preds, dim=0 ).numpy() return preds
数据加载 自定义数据集加载类 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 class COVID19Dataset (Dataset ): """ x: np.ndarray 特征矩阵. y: np.ndarray 目标标签, 如果为None,则是预测的数据集 """ def __init__ (self, x, y=None ): if y is None : self.y = y else : self.y = torch.FloatTensor(y) self.x = torch.FloatTensor(x) def __getitem__ (self, idx ): if self.y is None : return self.x[idx] return self.x[idx], self.y[idx] def __len__ (self ): return len (self.x)
特征选择(可选) 观察数据,选择更有效的数据
1 2 df = pd.read_csv('./covid.train.csv' ) df.describe()
利用Pearson相关系数分析不同feature与label的相关性强弱。
1 df.corr()['tested_positive' ].sort_values(ascending=False )
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 def select_feat (train_data, valid_data, test_data, select_all=True ): """ 特征选择 选择较好的特征用来拟合回归模型 """ y_train, y_valid = train_data[:, -1 ], valid_data[:, -1 ] ''' [:, -1]:这是一个NumPy的切片操作,用于选择数组中的特定行和列。 :表示选择所有行,即选择整个数据集。 -1表示选择最后一列。在Python中,使用负数索引可以从数组的末尾开始计数,-1就是数组中的最后一个元素,对于二维数组来说,就是最后一列。 ''' raw_x_train, raw_x_valid, raw_x_test = train_data[:, :-1 ], valid_data[:, :-1 ], test_data ''' [:, :-1]:这是一个NumPy的切片操作,用于选择数组中的特定行和列。 :表示选择所有行,即选择整个数据集。 :-1表示选择从第一列开始直到倒数第二列的所有列。在Python中,使用负数索引可以从数组的末尾开始计数,-1就是数组中的最后一个元素之前的所有元素,对于二维数组来说,就是除了最后一列之外的所有列。 ''' if select_all: feat_idx = list (range (raw_x_train.shape[1 ])) else : feat_idx = list (range (1 , 38 )) + [53 , 69 , 85 , 101 ] return raw_x_train[:, feat_idx], raw_x_valid[:, feat_idx], raw_x_test[:, feat_idx], y_train, y_valid
数据读取
从文件中读取数据pd.read_csv
数据拆分成三份 训练(training)、验证(validation)、测试(testing)
train_valid_split
: 分成训练、验证
select_feat
:拆分特征和label,并进行特征选择
COVID19Dataset
: 分别将训练、验证、测试集的特征和label组合成可以用于快速迭代训练的数据集train_dataset, valid_dataset, test_dataset
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 same_seed(config['seed' ]) pd.set_option('display.max_column' , 200 ) train_df, test_df = pd.read_csv('./covid.train.csv' ), pd.read_csv('./covid.test.csv' ) display(train_df.head(3 )) train_data, test_data = train_df.values, test_df.values del train_df, test_df train_data, valid_data = train_valid_split(train_data, config['valid_ratio' ], config['seed' ]) print (f"""train_data size: {train_data.shape} valid_data size: {valid_data.shape} test_data size: {test_data.shape} """ )x_train, x_valid, x_test, y_train, y_valid = select_feat(train_data, valid_data, test_data, config['select_all' ]) print (f'number of features: {x_train.shape[1 ]} ' )train_dataset, valid_dataset, test_dataset = COVID19Dataset(x_train, y_train), \ COVID19Dataset(x_valid, y_valid), \ COVID19Dataset(x_test) train_loader = DataLoader(train_dataset, batch_size=config['batch_size' ], shuffle=True , pin_memory=True ) valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size' ], shuffle=True , pin_memory=True ) test_loader = DataLoader(test_dataset, batch_size=config['batch_size' ], shuffle=False , pin_memory=True )
定义模型 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 class My_Model (nn.Module): def __init__ (self, input_dim ): super (My_Model, self).__init__() self.layers = nn.Sequential( nn.Linear(input_dim, 16 ), nn.ReLU(), nn.Linear(16 , 8 ), nn.ReLU(), nn.Linear(8 , 1 ) ) def forward (self, x ): x = self.layers(x) x = x.squeeze(1 ) return x
训练模型 训练迭代+验证迭代 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 def trainer (train_loader, valid_loader, model, config, device ): criterion = nn.MSELoss(reduction='mean' ) optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate' ], momentum=0.9 ) writer = SummaryWriter() if not os.path.isdir('./models' ): os.mkdir('./models' ) n_epochs, best_loss, step, early_stop_count = config['n_epochs' ], math.inf, 0 , 0 for epoch in range (n_epochs): model.train() loss_record = [] train_pbar = tqdm(train_loader, position=0 , leave=True ) train_pbar.set_description(f'Epoch [{epoch+1 } /{n_epochs} ]' ) for x, y in train_pbar: optimizer.zero_grad() x, y = x.to(device), y.to(device) pred = model(x) loss = criterion(pred, y) loss.backward() optimizer.step() step += 1 loss_record.append(loss.detach().item()) train_pbar.set_postfix({'loss' : loss.detach().item()}) mean_train_loss = sum (loss_record)/len (loss_record) writer.add_scalar('Loss/train' , mean_train_loss, step) model.eval () loss_record = [] for x, y in valid_loader: x, y = x.to(device), y.to(device) with torch.no_grad(): pred = model(x) loss = criterion(pred, y) loss_record.append(loss.item()) mean_valid_loss = sum (loss_record)/len (loss_record) print (f'Epoch [{epoch+1 } /{n_epochs} ]: Train loss: {mean_train_loss:.4 f} , Valid loss: {mean_valid_loss:.4 f} ' ) writer.add_scalar('Loss/valid' , mean_valid_loss, step) if mean_valid_loss < best_loss: best_loss = mean_valid_loss torch.save(model.state_dict(), config['save_path' ]) print ('Saving model with loss {:.3f}...' .format (best_loss)) early_stop_count = 0 else : early_stop_count += 1 if early_stop_count >= config['early_stop' ]: print ('\nModel is not improving, so we halt the training session.' ) return
开始训练
1 2 model = My_Model(input_dim=x_train.shape[1 ]).to(device) trainer(train_loader, valid_loader, model, config, device)
使用 tensorboard
输出模型训练过程和指标可视化(可选) tensorboard
可视化工具:可以记录并展现模型的训练过程中的各种指标,这里我们是记录模型的损失
1 2 %reload_ext tensorboard %tensorboard --logdir=./runs/ --port=6007
结果预测 测试集的预测结果保存到pred.csv
.
1 2 3 4 5 6 7 8 9 10 11 12 def save_pred (preds, file ): """ 将模型保存到指定位置 """ with open (file, 'w' , newline='' ) as fp: writer = csv.writer(fp) writer.writerow(['id' , 'tested_positive' ]) for i, p in enumerate (preds): writer.writerow([i, p]) model = My_Model(input_dim=x_train.shape[1 ]).to(device) model.load_state_dict(torch.load(config['save_path' ])) preds = predict(test_loader, model, device) save_pred(preds, 'pred.csv' )
参考 完整代码见:Hongyi_Lee_dl_homeworks/HW1_Regression at master · huaiyuechusan/Hongyi_Lee_dl_homeworks (github.com)
参考文章:
【李宏毅《机器学习》2022】作业1:COVID 19 Cases Prediction (Regression)_李宏毅2022作业-CSDN博客
Hongyi_Lee_dl_homeworks/Warmup/Pytorch_Tutorial_2.pdf at master · huaiyuechusan/Hongyi_Lee_dl_homeworks (github.com)