import os
import re
import numpy as np
import pandas as pd
import torch
from torch import nn as nn
from matplotlib import pyplot as plt


device = 'cuda' if torch.cuda.is_available() else 'cpu'


df = pd.read_csv(r'C:\\Users\\asheo\\Desktop\\附件3-气象数据.csv')
df[['最高温度', '最低温度']] = df[['最高温度', '最低温度']].applymap(lambda x: int(x[0:-1]))
df = df.drop(df.columns[-3:], axis=1)
df = df.rename(columns = {'最高温度': '最高温度/℃', '最低温度': '最低温度/℃'})
df


whether = df['天气状况'].apply(lambda x: x.split('/')).values.tolist()
whether = sum(whether, [])
whether = set(whether)
whether

{'中到大雨',
 '中雨',
 '中雨-大雨',
 '多云',
 '大到暴雨',
 '大雨',
 '小到中雨',
 '小雨',
 '小雨-中雨',
 '局部多云',
 '晴',
 '晴间多云',
 '暴雨',
 '阴',
 '阵雨',
 '雷阵雨',
 '雾'}


# def whetherQuantify(x):
#     if any(i in x for i in ['暴', '大', '中', '雷']):
#         return 4
#     elif '雨' in x:
#         return 3
#     elif '阴' in x:
#         return 2
#     elif '雾' in x:
#         return 1
#     else:
#         return 0

# df['天气指标'] = df['天气状况'].apply(whetherQuantify)
df['月'] = df['日期'].apply(lambda x: re.search(r'(\d+)月', x).group(1))
df['日'] = df['日期'].apply(lambda x: re.search(r'(\d+)日', x).group(1))

df


dataSet = pd.DataFrame({
    'maxTemp': df['最高温度/℃'],
    'minTemp': df['最低温度/℃']
})

# dataSet = pd.concat([dataSet, pd.get_dummies(df['天气指标'], prefix='whether')], axis=1) 
dataSet = pd.concat([dataSet, pd.get_dummies(df['月'], prefix='month')], axis=1) 
dataSet = pd.concat([dataSet, pd.get_dummies(df['日'], prefix='day')], axis=1)
dataSet


dataSet.shape

(1345, 45)


# LSTM超参数
num_epochs = 30
hidden_size = 35
num_layers = 2
dropout_rate = 0.2

time_step = 1  # 这个参数比较重要的，但是这里没有使用，在训练中卡虑的最大time step
pred_length = 1  # 这个参数其实没有用上，没有意义

# 文件处理参数
batch_size = 2
p = 0.15

# LSTM参数
input_size = 12 + 31  # 输入特征数
output_size = 2  # 线性层输出数


class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.0):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, dropout=dropout, batch_first=True)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(hidden_size, 128)
        self.fc2 = nn.Linear(128, output_size)
        self.dropout = nn.Dropout(dropout)
        

    def forward(self, input):
        h0 = torch.zeros(self.num_layers, input.size(0), self.hidden_size).to(device)  # 单向，若双向第一个参数*2
        c0 = torch.zeros(self.num_layers, input.size(0), self.hidden_size).to(device)
        output, (hn, cn) = self.lstm(input, (h0, c0))
        output = self.dropout(output)
        output = self.relu(output)
        output = self.fc1(output)
        output = self.relu(output)
        output = self.fc2(output)  # 用所有的time setp做训练
        return output
    
    def save(self, name='LSTM', path='./model_state'):
        if not os.path.exists(path):
            os.makedirs(path)
        torch.save(model.state_dict(), path + '/{}.pt'.format(str(name)))
        return None
    
    def load(self, filePath):
        self.load_state_dict(torch.load(filePath))
        return None
    
    def test(self, X, y, save=False, fileName='', savePath='./model_state'):
        self.eval()
        testLoss = 0
        
        with torch.no_grad():
            for i in range(len(X)):
                outputs = model(X[i:i+1])
                loss = criterion(outputs, y[i:i+1])
                testLoss += loss.item()
        
        if save:
            self.save('LSTM_{}'.format(fileName), savePath)
        
        return testLoss/len(X)


trainSet = dataSet.iloc[:int(len(dataSet)*(1-p)), :]
testSet = dataSet.iloc[int(len(dataSet)*(1-p)):, :]
print(len(trainSet), len(testSet))

1143 202


# 在样本集上逐步滑动
def toBatchData(database, xSep, ySep):
    X = torch.zeros(len(database)-xSep-ySep, xSep, input_size)
    y = torch.zeros(len(database)-xSep-ySep, ySep, output_size)
    for i in range(len(database)-xSep-ySep):
        X[i, :, :] = torch.IntTensor(database.iloc[i:i+xSep, 2:].values)
        y[i] = torch.IntTensor(database.iloc[(i+xSep):(i+xSep+ySep), :2].values)
    return X, y

xTrain, yTrain = toBatchData(trainSet, time_step, time_step)
xTest, yTest = toBatchData(testSet, time_step, time_step)

xTrain = xTrain.to(device)
yTrain = yTrain.to(device)
xTest = xTest.to(device)
yTest = yTest.to(device)


model = LSTM(
    input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
    output_size=output_size, dropout=dropout_rate
).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


model.train()
trainedLoss = []
testedLoss = []

for epoch in range(num_epochs):
    trainingLoss = 0
    
    model.train()
    for i in range(0, len(xTrain)-batch_size, batch_size):
        optimizer.zero_grad()
        outputs = model(xTrain[i:(i+batch_size)])
        loss = criterion(outputs, yTrain[i:(i+batch_size)])
        trainingLoss += loss.item()
        loss.backward()
        optimizer.step()
    
    trainedLoss.append(trainingLoss/len(xTrain))
    testedLoss.append(model.test(xTest, yTest, True, epoch+1))

    if (epoch+1)%(num_epochs//8) == 0:
        print('{} Epoch, Test Loss: {:.3f}'.format(epoch+1, testedLoss[epoch]))
        print("TRAING LOSS", trainedLoss[epoch], '\n', '='*20)

3 Epoch, Test Loss: 76.019
TRAING LOSS 11.348676380831144 
 ====================
6 Epoch, Test Loss: 25.767
TRAING LOSS 10.072912310934923 
 ====================
9 Epoch, Test Loss: 16.862
TRAING LOSS 7.387441865192183 
 ====================
12 Epoch, Test Loss: 10.962
TRAING LOSS 6.520972067201273 
 ====================
15 Epoch, Test Loss: 14.567
TRAING LOSS 6.684607018918243 
 ====================
18 Epoch, Test Loss: 7.327
TRAING LOSS 6.04519180411197 
 ====================
21 Epoch, Test Loss: 7.189
TRAING LOSS 5.788953600707647 
 ====================
24 Epoch, Test Loss: 6.979
TRAING LOSS 5.610944049437771 
 ====================
27 Epoch, Test Loss: 7.677
TRAING LOSS 5.587852919714217 
 ====================
30 Epoch, Test Loss: 7.786
TRAING LOSS 5.553234121065969 
 ====================


os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'

plt.plot(range(1, num_epochs+1), np.array(trainedLoss), linewidth=2.0, label='Training Loss')
plt.plot(range(1, num_epochs+1), np.array(testedLoss), linewidth=2.0, label='Testing Loss')
plt.xlabel('epoch', fontsize=14)
plt.ylabel('MSE', fontsize=14)
plt.title('Loss of each completion of a training', fontsize=18, loc='left')
plt.legend(fontsize=12)
plt.grid(True)
plt.xticks(range(1, num_epochs+1, 3))
plt.savefig('LSTM training.svg', format='svg')
plt.show()

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"

plot_template = dict(
    layout=go.Layout({
        "font_size": 18,
        "xaxis_title_font_size": 24,
        "yaxis_title_font_size": 24})
)

fig = px.line(df, labels=dict(
    created_at="Date", value="PM2.5 (ug/m3)", variable="Sensor"
))
fig.update_layout(
  template=plot_template, legend=dict(orientation='h', y=1.02, title_text="")
)
fig.show()


df2 = pd.read_excel(r'C:\\Users\\asheo\\Desktop\\工作簿1.xlsx')
df2


temp_ = df2['气温'].str.extract(r'(\d+)~(\d+)', expand=True).astype(int)

df3 = pd.DataFrame({
    'month': df2['日期时间'].apply(lambda x: x//100-202300),
    'day': df2['日期时间'].apply(lambda x: x%100),
    'maxTemp': temp_.max(axis=1),
    'minTemp': temp_.min(axis=1)
})


def oneHotEncoder(labels, num_classes):
    encoded_labels = np.zeros((len(labels), num_classes))
    for i, label in enumerate(labels):
        encoded_labels[i, label-1] = 1
    return encoded_labels

dataSet2 = pd.concat([
    df3[['maxTemp', 'minTemp']],
    pd.DataFrame(oneHotEncoder(df3['month'], 12), dtype=int),
    pd.DataFrame(oneHotEncoder(df3['day'], 31), dtype=int)
], axis=1)

dataSet2


pred = []

model.eval()
with torch.no_grad():
    for i in range(86):
        pred.append(model(torch.FloatTensor(dataSet2.iloc[i:(i+1), 2:].values).unsqueeze(0).to(device)).cpu().numpy())

pred = np.array(pred)


plt.plot(np.array([np.arange(86), np.arange(86)]).T, pred.reshape(86, 2), linewidth=2.0, label='pred')
plt.plot(np.array([np.arange(86), np.arange(86)]).T, dataSet2.iloc[:86,:2].values, linewidth=2.0, label='real')
plt.xlabel('day', fontsize=14)
plt.ylabel('value', fontsize=14)
plt.title('Compare', fontsize=18, loc='left')
plt.legend(fontsize=12)
plt.grid(True)
plt.savefig('compare.svg', format='svg')
plt.show()

	日期	天气状况	最高温度/℃	最低温度/℃	白天风力风向	夜晚风力风向
0	2018年1月1日	多云/多云	22	12	无持续风向<3级	无持续风向<3级
1	2018年1月1日	多云/多云	22	12	无持续风向<3级	无持续风向<3级
2	2018年1月2日	多云/多云	22	15	无持续风向<3级	无持续风向<3级
3	2018年1月3日	多云/阴	23	15	无持续风向<3级	无持续风向<3级
4	2018年1月4日	多云/小雨	21	16	无持续风向<3级	无持续风向<3级
...	...	...	...	...	...	...
1340	2021年8月27日	雷阵雨/雷阵雨	35	26	北风1-2级	北风1-2级
1341	2021年8月28日	雷阵雨/多云	33	26	北风1-2级	北风1-2级
1342	2021年8月29日	雷阵雨/雷阵雨	32	25	北风1-2级	北风1-2级
1343	2021年8月30日	阵雨/阵雨	34	26	北风1-2级	北风1-2级
1344	2021年8月31日	雷阵雨/阵雨	32	26	北风1-2级	北风1-2级

	日期	天气状况	最高温度/℃	最低温度/℃	白天风力风向	夜晚风力风向	月	日
0	2018年1月1日	多云/多云	22	12	无持续风向<3级	无持续风向<3级	1	1
1	2018年1月1日	多云/多云	22	12	无持续风向<3级	无持续风向<3级	1	1
2	2018年1月2日	多云/多云	22	15	无持续风向<3级	无持续风向<3级	1	2
3	2018年1月3日	多云/阴	23	15	无持续风向<3级	无持续风向<3级	1	3
4	2018年1月4日	多云/小雨	21	16	无持续风向<3级	无持续风向<3级	1	4
...	...	...	...	...	...	...	...	...
1340	2021年8月27日	雷阵雨/雷阵雨	35	26	北风1-2级	北风1-2级	8	27
1341	2021年8月28日	雷阵雨/多云	33	26	北风1-2级	北风1-2级	8	28
1342	2021年8月29日	雷阵雨/雷阵雨	32	25	北风1-2级	北风1-2级	8	29
1343	2021年8月30日	阵雨/阵雨	34	26	北风1-2级	北风1-2级	8	30
1344	2021年8月31日	雷阵雨/阵雨	32	26	北风1-2级	北风1-2级	8	31

	maxTemp	minTemp	month_1	month_10	month_11	month_12	month_2	month_3	month_4	month_5	...	day_29	day_3	day_30	day_31	day_4	day_5	day_6	day_7	day_8	day_9
0	22	12	1	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	22	12	1	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
2	22	15	1	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	23	15	1	0	0	0	0	0	0	0	...	0	1	0	0	0	0	0	0	0	0
4	21	16	1	0	0	0	0	0	0	0	...	0	0	0	0	1	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1340	35	26	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1341	33	26	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1342	32	25	0	0	0	0	0	0	0	0	...	1	0	0	0	0	0	0	0	0	0
1343	34	26	0	0	0	0	0	0	0	0	...	0	0	1	0	0	0	0	0	0	0
1344	32	26	0	0	0	0	0	0	0	0	...	0	0	0	1	0	0	0	0	0	0

	日期时间	气温	天气情况	风向	风力	日出	日落
0	20230523	17~24℃	阴转晴	东北风	44928	05:57:00	19:44:00
1	20230522	16~22℃	阴	东北风	44989	05:58:00	19:43:00
2	20230521	18~25℃	小雨	西北风	44928	05:58:00	19:43:00
3	20230520	22~29℃	多云转中雨	东北风	44928	05:59:00	19:42:00
4	20230519	21~27℃	小雨	东北风	44928	05:59:00	19:41:00
...	...	...	...	...	...	...	...
85	20230227	12~18℃	多云转小雨	东北风	44928	07:22:00	18:51:00
86	20230226	10~20℃	晴转多云	东南风	44928	07:23:00	18:51:00
87	20230225	9~18℃	多云	东北风	44928	07:24:00	18:50:00
88	20230224	8~15℃	多云	东南风	44928	07:25:00	18:49:00
89	20230223	9~12℃	多云转阴	西北风	44928	07:26:00	18:49:00

	maxTemp	minTemp	0	1	2	3	4	5	6	7	...	21	22	23	24	25	26	27	28	29	30
0	24	17	0	0	0	0	1	0	0	0	...	0	1	0	0	0	0	0	0	0	0
1	22	16	0	0	0	0	1	0	0	0	...	1	0	0	0	0	0	0	0	0	0
2	25	18	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	29	22	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
4	27	21	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
85	18	12	0	1	0	0	0	0	0	0	...	0	0	0	0	0	1	0	0	0	0
86	20	10	0	1	0	0	0	0	0	0	...	0	0	0	0	1	0	0	0	0	0
87	18	9	0	1	0	0	0	0	0	0	...	0	0	0	1	0	0	0	0	0	0
88	15	8	0	1	0	0	0	0	0	0	...	0	0	1	0	0	0	0	0	0	0
89	12	9	0	1	0	0	0	0	0	0	...	0	1	0	0	0	0	0	0	0	0

温度预测：基于LSTM模型¶

数据预处理¶

LSTM¶

可视化¶

	maxTemp	minTemp	0	1	2	3	4	5	6	7	...	21	22	23	24	25	26	27	28	29	30
0	24	17	0	0	0	0	1	0	0	0	...	0	1	0	0	0	0	0	0	0	0
1	22	16	0	0	0	0	1	0	0	0	...	1	0	0	0	0	0	0	0	0	0
2	25	18	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	29	22	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
4	27	21	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
85	18	12	0	1	0	0	0	0	0	0	...	0	0	0	0	0	1	0	0	0	0
86	20	10	0	1	0	0	0	0	0	0	...	0	0	0	0	1	0	0	0	0	0
87	18	9	0	1	0	0	0	0	0	0	...	0	0	0	1	0	0	0	0	0	0
88	15	8	0	1	0	0	0	0	0	0	...	0	0	1	0	0	0	0	0	0	0
89	12	9	0	1	0	0	0	0	0	0	...	0	1	0	0	0	0	0	0	0	0

	maxTemp	minTemp	0	1	2	3	4	5	6	7	...	21	22	23	24	25	26	27	28	29	30
0	24	17	0	0	0	0	1	0	0	0	...	0	1	0	0	0	0	0	0	0	0
1	22	16	0	0	0	0	1	0	0	0	...	1	0	0	0	0	0	0	0	0	0
2	25	18	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	29	22	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
4	27	21	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
85	18	12	0	1	0	0	0	0	0	0	...	0	0	0	0	0	1	0	0	0	0
86	20	10	0	1	0	0	0	0	0	0	...	0	0	0	0	1	0	0	0	0	0
87	18	9	0	1	0	0	0	0	0	0	...	0	0	0	1	0	0	0	0	0	0
88	15	8	0	1	0	0	0	0	0	0	...	0	0	1	0	0	0	0	0	0	0
89	12	9	0	1	0	0	0	0	0	0	...	0	1	0	0	0	0	0	0	0	0

	maxTemp	minTemp	0	1	2	3	4	5	6	7	...	21	22	23	24	25	26	27	28	29	30
0	24	17	0	0	0	0	1	0	0	0	...	0	1	0	0	0	0	0	0	0	0
1	22	16	0	0	0	0	1	0	0	0	...	1	0	0	0	0	0	0	0	0	0
2	25	18	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	29	22	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
4	27	21	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
85	18	12	0	1	0	0	0	0	0	0	...	0	0	0	0	0	1	0	0	0	0
86	20	10	0	1	0	0	0	0	0	0	...	0	0	0	0	1	0	0	0	0	0
87	18	9	0	1	0	0	0	0	0	0	...	0	0	0	1	0	0	0	0	0	0
88	15	8	0	1	0	0	0	0	0	0	...	0	0	1	0	0	0	0	0	0	0
89	12	9	0	1	0	0	0	0	0	0	...	0	1	0	0	0	0	0	0	0	0