python气象信息系统工程 第十二章

第12章 机器学习初探

12.2 传统机器学习

12.2.2 示例数据集

1
2
3
4
5
6
from sklearn.datasets import load_iris
data = load_iris()
print(data['data'].shape)
print('-----')
print(data['target'].shape)

12.2.5 分割数据集

1
2
3
4
5
6
7
8
9
10
11
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

data = load_iris()
X, y = data['data'], data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

print(X_train.shape, y_train.shape)
print('-----')
print(X_test.shape, y_test.shape)

12.2.6 使用内建算法进行学习

1
2
3
4
5
6
7
8
9
10
11
12
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

data = load_iris()
X, y = data['data'], data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
model = RandomForestClassifier() # 建立模型实例,这里可以接收多种超参以配置算法
model.fit(X_train, y_train) # 用训练集数据训练模型
score = model.score(X_test, y_test) # 用测试集数据测试模型的准确性
print(score)

12.2.7 使用其他指标评估模型

1
2
3
4
5
6
from sklearn.metrics import f1_score

y_predict = model.predict(X_test) # 使用测试集数据进行预测
score = f1_score(y_test, y_predict, average='weighted') # 第一个参数为真实的类别,第二个参数为预测的类别
print(score)

12.2.8 使用模型进行预测

1
2
3
y_predict = model.predict(X_test) # 使用测试集数据进行预测
print(y_predict)

12.2.9 保存/载入训练好的模型

1
2
3
4
5
6
import joblib
joblib.dump(model, 'model.joblib') # 保存模型到文件

model_load = joblib.load('/home/mw/input/pythonbook4259/model.joblib') # 从文件载入模型
print(model_load.score(X_test, y_test)) # 测试模型,这一步并非必要

12.3 深度学习框架

12.3.2 使用

1
2
3
4
5
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
class Model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 20, 5, 1)
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(20, 50, 5, 1)
self.pool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(4 * 4 * 50, 500)
self.fc2 = nn.Linear(500, 10)

def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool1(x)
x = F.relu(self.conv2(x))
x = self.pool2(x)
x = x.view(-1, 4 * 4 * 50)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.softmax(x, dim=1)
1
2
3
4
5
6
7
8
9
10
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('./resource', train=True, download=True,
transform=transforms.ToTensor()),
batch_size=5000, shuffle=True)

test_loader = torch.utils.data.DataLoader(
datasets.MNIST('./resource', train=False,
transform=transforms.ToTensor()),
batch_size=5000, shuffle=True)

1
2
3
4
5
6
7
8
9
10
11
12
13
model.train() # 将模型设置为训练模式
for epoch in range(1, 21): # 训练20个epoch
for batch_idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad() # 梯度归零
output = model(data) # 正向传播
loss = F.nll_loss(output, target) # 计算损失
loss.backward() # 反向传播
optimizer.step() # 优化模型权重
if batch_idx % 2 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch0_idx / len(train_loader), loss.item()))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
model.eval() # 将模型设置为运行模式
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
output = model(data)
test_loss = test_loss + F.nll_loss(output, target, reduction='sum').item()
pred = output.argmax(dim=1, keepdim=True)
correct = correct + pred.eq(target.view_as(pred)).sum().item()

test_loss = test_loss / len(test_loader.dataset)

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))