Skip to content

Commit 86495a0

Browse files
committed
model1 1
1 parent 6b0844d commit 86495a0

File tree

3 files changed

+321
-1
lines changed

3 files changed

+321
-1
lines changed

create_data.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,4 +167,23 @@
167167

168168
#%%
169169
df_full = pd.read_csv('data\daily_clean_full.csv')
170-
df_full
170+
df_full
171+
172+
#%%
173+
# sample and show one year period
174+
from matplotlib import pyplot as plt
175+
values = df_full[:365].values
176+
values
177+
178+
#%%
179+
# specify columns to plot
180+
groups = [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12]
181+
i = 1
182+
# plot each column
183+
plt.figure()
184+
for group in groups:
185+
plt.subplot(len(groups), 1, i)
186+
plt.plot(values[:, group])
187+
plt.title(df_full.columns[group], y=0.5, loc='right')
188+
i += 1
189+
plt.show()

data/hourly_lorinc_2011-2018.xls

0 Bytes
Binary file not shown.

model1.py

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
#%% [markdown]
2+
# # Model 1
3+
4+
#%%
5+
# ### Import resources and create data
6+
import torch
7+
from torch import nn
8+
import numpy as np
9+
import matplotlib.pyplot as plt
10+
get_ipython().run_line_magic('matplotlib', 'inline')
11+
import pandas as pd
12+
from sklearn.preprocessing import StandardScaler
13+
14+
#%%
15+
# load data
16+
df_full = pd.read_csv('data\daily_clean_full.csv')
17+
df_full.head()
18+
19+
#%%
20+
# get data for single station forecast
21+
df = df_full[[ 'datetime',
22+
'temp_avr',
23+
'temp_max',
24+
'temp_min',
25+
'pres',
26+
'u',
27+
'v',
28+
'prec',
29+
'Teleki'
30+
]]
31+
df.head()
32+
33+
#%%
34+
# normalize columns with (c-mean)/std
35+
df[['temp_avr',
36+
'temp_max',
37+
'temp_min',
38+
'pres',
39+
'u',
40+
'v',
41+
'prec',
42+
'Teleki']] = StandardScaler().fit_transform(df[[ 'temp_avr',
43+
'temp_max',
44+
'temp_min',
45+
'pres',
46+
'u',
47+
'v',
48+
'prec',
49+
'Teleki']])
50+
df.head()
51+
52+
#%%
53+
# make the yesterday day column
54+
df['Teleki_ystd'] = df['Teleki'].shift(+1)
55+
# fill the missing day value with next day
56+
df['Teleki_ystd'] = df['Teleki_ystd'].fillna(method='bfill')
57+
df.head()
58+
59+
#%%
60+
# build the pytorch train, validation and test sets
61+
62+
# train data
63+
mask = (df['datetime'] < '2017-01-01')
64+
df_train = df.loc[mask].drop(columns=['datetime'])
65+
df_train_input = df_train.drop(columns=['Teleki'])
66+
df_train_label = df_train['Teleki']
67+
df_train_label
68+
# train tensors
69+
train_input = torch.tensor(df_train_input.values)
70+
train_label = torch.tensor(df_train_label.values)
71+
72+
#%%
73+
# validation data
74+
mask = (df['datetime'] < '2018-01-01') & (df['datetime'] >= '2017-01-01')
75+
df_valid = df.loc[mask].drop(columns=['datetime'])
76+
df_valid_input = df_valid.drop(columns=['Teleki'])
77+
df_valid_label = df_valid['Teleki']
78+
df_valid_label
79+
# validation tensors
80+
valid_input = torch.tensor(df_valid_input.values)
81+
valid_label = torch.tensor(df_valid_label.values)
82+
83+
#%%
84+
# test data
85+
mask = (df['datetime'] >= '2018-01-01')
86+
df_test = df.loc[mask].drop(columns=['datetime'])
87+
df_test_input = df_test.drop(columns=['Teleki'])
88+
df_test_label = df_test['Teleki']
89+
df_test_label
90+
# validation tensors
91+
test_input = torch.tensor(df_test_input.values)
92+
test_label = torch.tensor(df_test_label.values)
93+
94+
95+
96+
97+
98+
99+
100+
101+
102+
103+
104+
105+
106+
107+
108+
109+
110+
111+
112+
113+
114+
115+
116+
117+
118+
#%%
119+
plt.figure(figsize=(8,5))
120+
121+
# how many time steps/data pts are in one batch of data
122+
seq_length = 20
123+
124+
# generate evenly spaced data pts
125+
time_steps = np.linspace(0, np.pi, seq_length + 1)
126+
data = np.sin(time_steps)
127+
data.resize((seq_length + 1, 1)) # size becomes (seq_length+1, 1), adds an input_size dimension
128+
129+
x = data[:-1] # all but the last piece of data
130+
y = data[1:] # all but the first
131+
132+
# display the data
133+
plt.plot(time_steps[1:], x, 'r.', label='input, x') # x
134+
plt.plot(time_steps[1:], y, 'b.', label='target, y') # y
135+
136+
plt.legend(loc='best')
137+
plt.show()
138+
139+
#%% [markdown]
140+
# ---
141+
# ## Define the RNN
142+
#
143+
# Next, we define an RNN in PyTorch. We'll use `nn.RNN` to create an RNN layer, then we'll add a last, fully-connected layer to get the output size that we want. An RNN takes in a number of parameters:
144+
# * **input_size** - the size of the input
145+
# * **hidden_dim** - the number of features in the RNN output and in the hidden state
146+
# * **n_layers** - the number of layers that make up the RNN, typically 1-3; greater than 1 means that you'll create a stacked RNN
147+
# * **batch_first** - whether or not the input/output of the RNN will have the batch_size as the first dimension (batch_size, seq_length, hidden_dim)
148+
#
149+
# Take a look at the [RNN documentation](https://pytorch.org/docs/stable/nn.html#rnn) to read more about recurrent layers.
150+
151+
#%%
152+
class RNN(nn.Module):
153+
def __init__(self, input_size, output_size, hidden_dim, n_layers):
154+
super(RNN, self).__init__()
155+
156+
self.hidden_dim=hidden_dim
157+
158+
# define an RNN with specified parameters
159+
# batch_first means that the first dim of the input and output will be the batch_size
160+
self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
161+
162+
# last, fully-connected layer
163+
self.fc = nn.Linear(hidden_dim, output_size)
164+
165+
def forward(self, x, hidden):
166+
# x (batch_size, seq_length, input_size)
167+
# hidden (n_layers, batch_size, hidden_dim)
168+
# r_out (batch_size, time_step, hidden_size)
169+
batch_size = x.size(0)
170+
171+
# get RNN outputs
172+
r_out, hidden = self.rnn(x, hidden)
173+
# shape output to be (batch_size*seq_length, hidden_dim)
174+
r_out = r_out.view(-1, self.hidden_dim)
175+
176+
# get final output
177+
output = self.fc(r_out)
178+
179+
return output, hidden
180+
181+
#%% [markdown]
182+
# ### Check the input and output dimensions
183+
#
184+
# As a check that your model is working as expected, test out how it responds to input data.
185+
186+
#%%
187+
# test that dimensions are as expected
188+
test_rnn = RNN(input_size=1, output_size=1, hidden_dim=10, n_layers=2)
189+
190+
# generate evenly spaced, test data pts
191+
time_steps = np.linspace(0, np.pi, seq_length)
192+
data = np.sin(time_steps)
193+
data.resize((seq_length, 1))
194+
195+
test_input = torch.Tensor(data).unsqueeze(0) # give it a batch_size of 1 as first dimension
196+
print('Input size: ', test_input.size())
197+
198+
# test out rnn sizes
199+
test_out, test_h = test_rnn(test_input, None)
200+
print('Output size: ', test_out.size())
201+
print('Hidden state size: ', test_h.size())
202+
203+
#%% [markdown]
204+
# ---
205+
# ## Training the RNN
206+
#
207+
# Next, we'll instantiate an RNN with some specified hyperparameters. Then train it over a series of steps, and see how it performs.
208+
209+
#%%
210+
# decide on hyperparameters
211+
input_size=1
212+
output_size=1
213+
hidden_dim=32
214+
n_layers=1
215+
216+
# instantiate an RNN
217+
rnn = RNN(input_size, output_size, hidden_dim, n_layers)
218+
print(rnn)
219+
220+
#%% [markdown]
221+
# ### Loss and Optimization
222+
#
223+
# This is a regression problem: can we train an RNN to accurately predict the next data point, given a current data point?
224+
#
225+
# >* The data points are coordinate values, so to compare a predicted and ground_truth point, we'll use a regression loss: the mean squared error.
226+
# * It's typical to use an Adam optimizer for recurrent models.
227+
228+
#%%
229+
# MSE loss and Adam optimizer with a learning rate of 0.01
230+
criterion = nn.MSELoss()
231+
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.01)
232+
233+
#%% [markdown]
234+
# ### Defining the training function
235+
#
236+
# This function takes in an rnn, a number of steps to train for, and returns a trained rnn. This function is also responsible for displaying the loss and the predictions, every so often.
237+
#
238+
# #### Hidden State
239+
#
240+
# Pay close attention to the hidden state, here:
241+
# * Before looping over a batch of training data, the hidden state is initialized
242+
# * After a new hidden state is generated by the rnn, we get the latest hidden state, and use that as input to the rnn for the following steps
243+
244+
#%%
245+
# train the RNN
246+
def train(rnn, n_steps, print_every):
247+
248+
# initialize the hidden state
249+
hidden = None
250+
251+
for batch_i, step in enumerate(range(n_steps)):
252+
# defining the training data
253+
time_steps = np.linspace(step * np.pi, (step+1)*np.pi, seq_length + 1)
254+
data = np.sin(time_steps)
255+
data.resize((seq_length + 1, 1)) # input_size=1
256+
257+
x = data[:-1]
258+
y = data[1:]
259+
260+
# convert data into Tensors
261+
x_tensor = torch.Tensor(x).unsqueeze(0) # unsqueeze gives a 1, batch_size dimension
262+
y_tensor = torch.Tensor(y)
263+
264+
# outputs from the rnn
265+
prediction, hidden = rnn(x_tensor, hidden)
266+
267+
## Representing Memory ##
268+
# make a new variable for hidden and detach the hidden state from its history
269+
# this way, we don't backpropagate through the entire history
270+
hidden = hidden.data
271+
272+
# calculate the loss
273+
loss = criterion(prediction, y_tensor)
274+
# zero gradients
275+
optimizer.zero_grad()
276+
# perform backprop and update weights
277+
loss.backward()
278+
optimizer.step()
279+
280+
# display loss and predictions
281+
if batch_i%print_every == 0:
282+
print('Loss: ', loss.item())
283+
plt.plot(time_steps[1:], x, 'r.') # input
284+
plt.plot(time_steps[1:], prediction.data.numpy().flatten(), 'b.') # predictions
285+
plt.show()
286+
287+
return rnn
288+
289+
290+
#%%
291+
# train the rnn and monitor results
292+
n_steps = 75
293+
print_every = 15
294+
295+
trained_rnn = train(rnn, n_steps, print_every)
296+
297+
#%% [markdown]
298+
# ### Time-Series Prediction
299+
#
300+
# Time-series prediction can be applied to many tasks. Think about weather forecasting or predicting the ebb and flow of stock market prices. You can even try to generate predictions much further in the future than just one time step!
301+

0 commit comments

Comments
 (0)