使用卷积网络处理序列
问题
- 文本类
分别是好评还是差评
代码样例
- 时间序列类
给定过去lookback个时间步之内的天气数据(气温),能否预测delay个时间步之后的数据(气温)?
代码样例
数据处理
- ①文本类:见前文本序列Embedding,如果只imdb封装好的直接load再pad_sequences
- ②时间序列类:
按照时间步长划分批次选取数据
### 为耶拿数据准备更高分辨率的数据生成器
#导入数据
import os
import numpy as np
data_dir = 'D:\\Jupyter\\Keras\\jena_climate_2009_2016.csv\\'
fname = os.path.join(data_dir, 'jena_climate_2009_2016.csv')
f = open(fname)
data = f.read()
f.close()
lines = data.split('\n')
header = lines[0].split(',')
lines = lines[1:]
float_data = np.zeros((len(lines), len(header)-1))
for i, line in enumerate(lines):
values = [float(x) for x in line.split(',')[1:]]
float_data[i, :] = values
mean = float_data[:200000].mean(axis=0)
float_data -= mean
std = float_data[:200000].std(axis=0)
float_data /= std
def generator(data, lookback, delay, min_index, max_index,
shuffle=False, batch_size=128, step=6):
if max_index is None:
max_index = len(data) - delay - 1
i = min_index + lookback
while 1:
if shuffle:
rows = np.random.randint(
min_index + lookback, max_index, size=batch_size)
else:
if i + batch_size >= max_index:
i = min_index + lookback
rows = np.arange(i, min(i + batch_size, max_index))
i += len(rows)
samples = np.zeros((len(rows),
lookback // step,
data.shape[-1]))
targets = np.zeros((len(rows),))
for j, row in enumerate(rows):
indices = range(rows[j] - lookback, rows[j], step)
samples[j] = data[indices]
targets[j] = data[rows[j] + delay][1]
yield samples, targets
step = 3
lookback = 720 # Unchanged
delay = 144 # Unchanged
train_gen = generator(float_data,
lookback=lookback,
delay=delay,
min_index=0,
max_index=200000,
shuffle=True,
step=step,
batch_size=batch_size)
val_gen = generator(float_data,
lookback=lookback,
delay=delay,
min_index=200001,
max_index=300000,
step=step,
batch_size=batch_size)
test_gen = generator(float_data,
lookback=lookback,
delay=delay,
min_index=300001,
max_index=None,
step=step,
batch_size=batch_size)
val_steps = (300000 - 200001 - lookback) // 128
test_steps = (len(float_data) - 300001 - lookback) // 128
模型构建
文本类(用了Embedding和全池化层)
Embedding输入【samples, maxlen】
Embedding输出【samples, maxlen, input_fratures】
from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop
model = Sequential()#【None,500】
model.add(layers.Embedding(max_features, 128, input_length=max_len))#【None,500,128】
model.add(layers.Conv1D(32, 7, activation=’relu’))#(None, 494, 32)
model.add(layers.MaxPooling1D(5))#(None, 98, 32)
model.add(layers.Conv1D(32, 7, activation=’relu’))#(None, 92, 32)
model.add(layers.GlobalMaxPooling1D())#(None, 32)
model.add(layers.Dense(1))#(None, 1)
model.summary()
model.compile(optimizer=RMSprop(lr=1e-4),
loss=’binary_crossentropy’,
metrics=[‘acc’])
history = model.fit(x_train, y_train,
epochs=10,
batch_size=128,
validation_split=0.2)
时间序列类(用了CNN和GRU)
输入数据
【batch_size, 时间步,属性dims】
输出
[batch_size,dims] or [batch_size, 时间步,dims]
结合一维卷积基和GRU层的模型
from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop
model = Sequential()
model.add(layers.Conv1D(32, 5, activation=’relu’,
input_shape=(None, float_data.shape[-1])))# (None, None, 32)
model.add(layers.MaxPooling1D(3))#(None, None, 32)
model.add(layers.Conv1D(32, 5, activation=’relu’))#(None, None, 32)
model.add(layers.GRU(32, dropout=0.1, recurrent_dropout=0.5))#(None, 32)
model.add(layers.Dense(1))# (None, 1)
model.summary()
model.compile(optimizer=RMSprop(), loss=’mae’)
history = model.fit_generator(train_gen,
steps_per_epoch=500,
epochs=20,
validation_data=val_gen,
validation_steps=val_steps)
模型评估
- 绘图,见前【acc,val_acc,loss,val_loss】