解析原mat的索引,分割训练和测试制作新mat
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 1 15:55:32 2018
HSGAN dataset:
Semisupervised Hyperspectral Image Classification Based on Generative Adversarial Networks
Dataset: Indian Pines data set
IndiaP.mat
@author: FeiDong
"""
import numpy as np
import scipy.io as scio
import matplotlib.pyplot as plt
import math
from sklearn.cross_validation import train_test_split
# =============================================================================
# read dataset and convert
dataset_path = './dataset/IndiaP.mat'
data_mat = scio.loadmat(dataset_path)
img = np.array(data_mat['img']) # read data from dict
GroundT = np.array(data_mat['GroundT']).T # read data from dict
data_G_img = np.zeros((GroundT.shape[0], img.shape[2])) # 存放提取的具有标签的img
for i in range(GroundT.shape[0]):
temp_index = GroundT[i, 0]
if temp_index % img.shape[0] != 0 :
temp_col = math.floor(temp_index / img.shape[0])
temp_row = int(math.fmod(temp_index, img.shape[0]) - 1)
else:
temp_col = math.floor(temp_index / img.shape[0])-1
temp_row = int(img.shape[0] - 1)
data_G_img[i, :] = img[temp_col, temp_row, :]
# 每一行是一个样本,第1列为索引值,第2列为标签,之后200列为像元信息.
data_sample = np.hstack((GroundT, data_G_img))
# 每一行是一个样本,第1列为索引值,第2列为标签.
data_sample_label = data_sample[:, [0,1]]
# 每一行是一个样本,第1列为索引值,之后200列为像元信息.
data_sample_dataset = data_sample
data_sample_dataset = np.delete(data_sample, [1], axis=1)
# 随机划分为训练子集和测试子集
x_trainI, x_testI, y_trainI, y_testI = train_test_split(
data_sample_dataset,data_sample_label,test_size=0.1, random_state=1)
# 提取出训练样本和测试样本的索引
x_train_index = x_trainI[:, 0]
x_test_index = x_testI[:, 0]
# 剔除样本中的index
x_train = x_trainI[:, 1:201]
x_test = x_testI[:, 1:201]
y_train = y_trainI[:, 1][:, np.newaxis]
y_test = y_testI[:, 1][:, np.newaxis]
# save data_sample
data_sample_path = './dataset/data_sample.mat'
scio.savemat(data_sample_path, {'data': data_sample,
'x_train': x_train,
'x_test': x_test,
'y_train': y_train,
'y_test': y_test,
'file_information': 'Indian Pines data set, each row is a sample, first columns are index values, second columns are label, and the last 200 columns are pixel information.'})
# end
# =============================================================================
读取新mat
import numpy as np
import scipy.io as sio
import tensorflow as tf
def make_one_hot(data, num_label):
return (np.arange(num_label) == data).astype(np.integer)
def MaxMinNormalization(matrix):
Min = np.min(matrix)
Max = np.max(matrix)
mat_norm = (matrix - Min) / (Max - Min)
return mat_norm
def get_batch_data(batch_size, x_train, y_train_onehot):
# 数据类型转换为tf.float32
x_train = tf.cast(x_train, tf.float32)
y_train_onehot = tf.cast(y_train_onehot, tf.float32)
#从tensor列表中按顺序或随机抽取一个tensor
input_queue = tf.train.slice_input_producer([x_train, y_train_onehot], shuffle=False)
x_batch, y_batch = tf.train.batch(input_queue, batch_size=batch_size, num_threads=1, capacity=128)
return x_batch, y_batch
def get_files():
dataset_path = 'data_sample_new.mat'
data_mat = sio.matlab.loadmat(dataset_path)
data_sample = np.array(data_mat['data']) # read data from dict
x_train = np.array(data_mat['x_train']) # read x_train data from dict
x_test = np.array(data_mat['x_test']) # read x_test data from dict
y_train = np.array(data_mat['y_train']) # read y_train data from dict
y_test = np.array(data_mat['y_test']) # read y_test data from dict
# 归一化
x_train = MaxMinNormalization(x_train)
x_test = MaxMinNormalization(x_test)
# onehot
y_train_onehot = make_one_hot(y_train, 16)
y_test_onehot = make_one_hot(y_test, 16)
print('x_train.shape',x_train.shape)
print(x_train[0])
print('y_train_onehot.shape',y_train_onehot.shape)
print(y_train_onehot[0])
return x_train,y_train_onehot
def get_batch( x_train, y_train_onehot, image_W, image_H, batch_size, capacity):
return get_batch_data(batch_size, x_train, y_train_onehot)
def main(argv=None):
t1,t2 = get_files()
t3,t4 = get_batch(t1,t2, 1, 200, 6, 126)
print("Training data is converted into images!")
if __name__ == '__main__':
main()
#然后再training.py里get_files再get_batch即可
对mat文件取领域窗口····
我是用matlab分割mat文件(比较方便)
用pytho读取分割后的文件合并制作成数据集mat,再同上面的get_files和get_batch一样
matlab分割:
testt.m
clc, clear, close all
% load the ground truth and the hyperspectral image
path = ‘.\Dataset\’;
inputs = ‘Salinas’;
location = [path, inputs];
load(location);
%%
% estimate the size of the input image
[height, width, bands] = size(img);
%Label构成的图
GroundImage = zeros(height, width);%145*145
GroundImage(GroundT(1, :)) = GroundT(2, :);%145x145 double
X_cell = 29;
Y_cell = 29;
totalNum = floor(height/Y_cell) floor(width/X_cell);%[个数,对应每个的所有像素点数]
OutData = zeros(totalNum,X_cellY_cellbands);
for row=1:floor(height/Y_cell)
start = (row-1)Y_cell;
stop = row*Y_cell;
dataCol = GroundImage(start+1:stop,:,:);%height中取出对应的height衿
Path_ = strcat(path,’Label‘,’row’,num2str(row));%strcat字符串拼接,D:\四个波段\row1’
testtfunc(dataCol,width ,X_cell,Y_cell,Path_,row );
row/floor(height/Y_cell)
end
testt.func
function [ ] = testtfunc( label,width ,X_cell,Y_cell,outputPath,row )
ColNum = floor(width / X_cell);
for col = 1:ColNum
start = (col-1)X_cell;
stop = colX_cell;
data = label(:,start+1:stop,:);
% data(:,:,3) = dataCol(:,start+1:stop,1);
path_ =strcat(outputPath,'col',num2str(col));
%inPath=strcat(outputPath,'col',num2str(col),'.tif');
%imwrite(uint32(data),inPath);
sum = 0;
count = 0;
result = 0;
for i = 1:X_cell
for j = 1:Y_cell
if(data(j,i) ~= '0')
sum = sum + data(j,i);
count = count + 1;
end
end
end
result = [sum / count];
strr = strcat(path_ , '.mat')
save(strr,'result');
% imwrite(data,inPath);
% tmp = reshape(data,[1 feature]);
% outputData(k,:)=tmp;
% k=k+1;
end
python的读取和制作mat:
import numpy as np
import scipy.io as scio
import matplotlib.pyplot as plt
import math
from sklearn.cross_validation import train_test_split
dataset_path = ‘./Salinas切割后数据/‘
label_path = ‘./Salinas切割后标签(有值的取均值)/‘
#dataname = ‘row1col1’
#data_mat = scio.loadmat(dataset_path+dataname)
#data = np.array(data_mat[‘data’])
ROW = 17
COL = 7
train_data = []
train_label = []
for i in range(ROW):
for j in range(COL):
dataname = ‘row’+str(i+1)+’col’+str(j+1)
data_mat = scio.loadmat(dataset_path+dataname)
data = data_mat[‘data’]
train_data.append(data)
train_data = np.array(train_data)
for i in range(ROW):
for j in range(COL):
dataname = ‘_Label_row’+str(i+1)+’col’+str(j+1)#_Label_row1col1
data_mat = scio.loadmat(label_path+dataname)
label = data_mat[‘result’][0]
train_label.append(label)
train_label = np.array(train_label)
print(“data.shape”,train_data.shape)
print(“data.shape”,train_label.shape)
x_train, x_test, y_train, y_test = train_test_split(
train_data,train_label,test_size=0.1, random_state=1)
save data_sample
data_sample_path = ‘./data_sample_Salinas.mat’
scio.savemat(data_sample_path, {‘data’: train_data,
‘x_train’: x_train,
‘x_test’: x_test,
‘y_train’: y_train,
‘y_test’: y_test,
‘file_information’: ‘Salinas data set, each row is a sample.Made by Kevin’})
领域窗口先保存再读取制作,是比较低效且占内存的方法·····
领域窗口分割如果直接使用python会更好,只比matlab分割麻烦些。python可以用nump的zero创建矩阵然后解析原ma进行填充,再作分割,或者领域窗口滑动