解析原mat的索引,分割训练和测试制作新mat
# -*- coding: utf-8 -*- """ Created on Sun Jul 1 15:55:32 2018 HSGAN dataset: Semisupervised Hyperspectral Image Classification Based on Generative Adversarial Networks Dataset: Indian Pines data set IndiaP.mat @author: FeiDong """ import numpy as np import scipy.io as scio import matplotlib.pyplot as plt import math from sklearn.cross_validation import train_test_split # ============================================================================= # read dataset and convert dataset_path = './dataset/IndiaP.mat' data_mat = scio.loadmat(dataset_path) img = np.array(data_mat['img']) # read data from dict GroundT = np.array(data_mat['GroundT']).T # read data from dict data_G_img = np.zeros((GroundT.shape[0], img.shape[2])) # 存放提取的具有标签的img for i in range(GroundT.shape[0]): temp_index = GroundT[i, 0] if temp_index % img.shape[0] != 0 : temp_col = math.floor(temp_index / img.shape[0]) temp_row = int(math.fmod(temp_index, img.shape[0]) - 1) else: temp_col = math.floor(temp_index / img.shape[0])-1 temp_row = int(img.shape[0] - 1) data_G_img[i, :] = img[temp_col, temp_row, :] # 每一行是一个样本,第1列为索引值,第2列为标签,之后200列为像元信息. data_sample = np.hstack((GroundT, data_G_img)) # 每一行是一个样本,第1列为索引值,第2列为标签. data_sample_label = data_sample[:, [0,1]] # 每一行是一个样本,第1列为索引值,之后200列为像元信息. data_sample_dataset = data_sample data_sample_dataset = np.delete(data_sample, [1], axis=1) # 随机划分为训练子集和测试子集 x_trainI, x_testI, y_trainI, y_testI = train_test_split( data_sample_dataset,data_sample_label,test_size=0.1, random_state=1) # 提取出训练样本和测试样本的索引 x_train_index = x_trainI[:, 0] x_test_index = x_testI[:, 0] # 剔除样本中的index x_train = x_trainI[:, 1:201] x_test = x_testI[:, 1:201] y_train = y_trainI[:, 1][:, np.newaxis] y_test = y_testI[:, 1][:, np.newaxis] # save data_sample data_sample_path = './dataset/data_sample.mat' scio.savemat(data_sample_path, {'data': data_sample, 'x_train': x_train, 'x_test': x_test, 'y_train': y_train, 'y_test': y_test, 'file_information': 'Indian Pines data set, each row is a sample, first columns are index values, second columns are label, and the last 200 columns are pixel information.'}) # end # =============================================================================
读取新mat
import numpy as np import scipy.io as sio import tensorflow as tf def make_one_hot(data, num_label): return (np.arange(num_label) == data).astype(np.integer) def MaxMinNormalization(matrix): Min = np.min(matrix) Max = np.max(matrix) mat_norm = (matrix - Min) / (Max - Min) return mat_norm def get_batch_data(batch_size, x_train, y_train_onehot): # 数据类型转换为tf.float32 x_train = tf.cast(x_train, tf.float32) y_train_onehot = tf.cast(y_train_onehot, tf.float32) #从tensor列表中按顺序或随机抽取一个tensor input_queue = tf.train.slice_input_producer([x_train, y_train_onehot], shuffle=False) x_batch, y_batch = tf.train.batch(input_queue, batch_size=batch_size, num_threads=1, capacity=128) return x_batch, y_batch def get_files(): dataset_path = 'data_sample_new.mat' data_mat = sio.matlab.loadmat(dataset_path) data_sample = np.array(data_mat['data']) # read data from dict x_train = np.array(data_mat['x_train']) # read x_train data from dict x_test = np.array(data_mat['x_test']) # read x_test data from dict y_train = np.array(data_mat['y_train']) # read y_train data from dict y_test = np.array(data_mat['y_test']) # read y_test data from dict # 归一化 x_train = MaxMinNormalization(x_train) x_test = MaxMinNormalization(x_test) # onehot y_train_onehot = make_one_hot(y_train, 16) y_test_onehot = make_one_hot(y_test, 16) print('x_train.shape',x_train.shape) print(x_train[0]) print('y_train_onehot.shape',y_train_onehot.shape) print(y_train_onehot[0]) return x_train,y_train_onehot def get_batch( x_train, y_train_onehot, image_W, image_H, batch_size, capacity): return get_batch_data(batch_size, x_train, y_train_onehot) def main(argv=None): t1,t2 = get_files() t3,t4 = get_batch(t1,t2, 1, 200, 6, 126) print("Training data is converted into images!") if __name__ == '__main__': main() #然后再training.py里get_files再get_batch即可
对mat文件取领域窗口····
我是用matlab分割mat文件(比较方便)
用pytho读取分割后的文件合并制作成数据集mat,再同上面的get_files和get_batch一样
matlab分割:
testt.m
clc, clear, close all
% load the ground truth and the hyperspectral image
path = ‘.\Dataset\’;
inputs = ‘Salinas’;
location = [path, inputs];
load(location);
%%
% estimate the size of the input image
[height, width, bands] = size(img);
%Label构成的图
GroundImage = zeros(height, width);%145*145
GroundImage(GroundT(1, :)) = GroundT(2, :);%145x145 double
X_cell = 29;
Y_cell = 29;
totalNum = floor(height/Y_cell) floor(width/X_cell);%[个数,对应每个的所有像素点数]
OutData = zeros(totalNum,X_cellY_cellbands);
for row=1:floor(height/Y_cell)
start = (row-1)Y_cell;
stop = row*Y_cell;
dataCol = GroundImage(start+1:stop,:,:);%height中取出对应的height衿
Path_ = strcat(path,’Label‘,’row’,num2str(row));%strcat字符串拼接,D:\四个波段\row1’
testtfunc(dataCol,width ,X_cell,Y_cell,Path_,row );
row/floor(height/Y_cell)
end
testt.func
function [ ] = testtfunc( label,width ,X_cell,Y_cell,outputPath,row )
ColNum = floor(width / X_cell);
for col = 1:ColNum
start = (col-1)X_cell;
stop = colX_cell;
data = label(:,start+1:stop,:);
% data(:,:,3) = dataCol(:,start+1:stop,1);
path_ =strcat(outputPath,'col',num2str(col));
%inPath=strcat(outputPath,'col',num2str(col),'.tif');
%imwrite(uint32(data),inPath);
sum = 0;
count = 0;
result = 0;
for i = 1:X_cell
for j = 1:Y_cell
if(data(j,i) ~= '0')
sum = sum + data(j,i);
count = count + 1;
end
end
end
result = [sum / count];
strr = strcat(path_ , '.mat')
save(strr,'result');
% imwrite(data,inPath);
% tmp = reshape(data,[1 feature]);
% outputData(k,:)=tmp;
% k=k+1;
end
python的读取和制作mat:
import numpy as np
import scipy.io as scio
import matplotlib.pyplot as plt
import math
from sklearn.cross_validation import train_test_split
dataset_path = ‘./Salinas切割后数据/‘
label_path = ‘./Salinas切割后标签(有值的取均值)/‘
#dataname = ‘row1col1’
#data_mat = scio.loadmat(dataset_path+dataname)
#data = np.array(data_mat[‘data’])
ROW = 17
COL = 7
train_data = []
train_label = []
for i in range(ROW):
for j in range(COL):
dataname = ‘row’+str(i+1)+’col’+str(j+1)
data_mat = scio.loadmat(dataset_path+dataname)
data = data_mat[‘data’]
train_data.append(data)
train_data = np.array(train_data)
for i in range(ROW):
for j in range(COL):
dataname = ‘_Label_row’+str(i+1)+’col’+str(j+1)#_Label_row1col1
data_mat = scio.loadmat(label_path+dataname)
label = data_mat[‘result’][0]
train_label.append(label)
train_label = np.array(train_label)
print(“data.shape”,train_data.shape)
print(“data.shape”,train_label.shape)
x_train, x_test, y_train, y_test = train_test_split(
train_data,train_label,test_size=0.1, random_state=1)
save data_sample
data_sample_path = ‘./data_sample_Salinas.mat’
scio.savemat(data_sample_path, {‘data’: train_data,
‘x_train’: x_train,
‘x_test’: x_test,
‘y_train’: y_train,
‘y_test’: y_test,
‘file_information’: ‘Salinas data set, each row is a sample.Made by Kevin’})
领域窗口先保存再读取制作,是比较低效且占内存的方法·····
领域窗口分割如果直接使用python会更好,只比matlab分割麻烦些。python可以用nump的zero创建矩阵然后解析原ma进行填充,再作分割,或者领域窗口滑动