处理mat数据

解析原mat的索引,分割训练和测试制作新mat

# -*- coding: utf-8 -*-
"""
Created on Sun Jul  1 15:55:32 2018


HSGAN dataset:
Semisupervised Hyperspectral Image Classification Based on Generative Adversarial Networks

Dataset: Indian Pines data set
    IndiaP.mat

@author: FeiDong
"""
import numpy as np
import scipy.io as scio
import matplotlib.pyplot as plt
import math
from sklearn.cross_validation import train_test_split
# =============================================================================
# read dataset and convert
dataset_path = './dataset/IndiaP.mat'
data_mat = scio.loadmat(dataset_path)

img = np.array(data_mat['img']) # read data from dict
GroundT = np.array(data_mat['GroundT']).T # read data from dict
data_G_img = np.zeros((GroundT.shape[0], img.shape[2])) # 存放提取的具有标签的img

for i in range(GroundT.shape[0]):
    temp_index = GroundT[i, 0]
    if temp_index % img.shape[0] != 0 :
        temp_col = math.floor(temp_index / img.shape[0])
        temp_row = int(math.fmod(temp_index, img.shape[0]) - 1)
    else:
        temp_col = math.floor(temp_index / img.shape[0])-1
        temp_row = int(img.shape[0] - 1)

    data_G_img[i, :] = img[temp_col, temp_row, :]

# 每一行是一个样本,第1列为索引值,第2列为标签,之后200列为像元信息.
data_sample = np.hstack((GroundT, data_G_img)) 
# 每一行是一个样本,第1列为索引值,第2列为标签.
data_sample_label = data_sample[:, [0,1]]   
# 每一行是一个样本,第1列为索引值,之后200列为像元信息.
data_sample_dataset = data_sample
data_sample_dataset = np.delete(data_sample, [1], axis=1)   

# 随机划分为训练子集和测试子集
x_trainI, x_testI, y_trainI, y_testI = train_test_split(
        data_sample_dataset,data_sample_label,test_size=0.1, random_state=1)
# 提取出训练样本和测试样本的索引
x_train_index = x_trainI[:, 0]
x_test_index = x_testI[:, 0]
# 剔除样本中的index
x_train = x_trainI[:, 1:201]
x_test = x_testI[:, 1:201]
y_train = y_trainI[:, 1][:, np.newaxis]
y_test = y_testI[:, 1][:, np.newaxis]

# save data_sample
data_sample_path = './dataset/data_sample.mat'
scio.savemat(data_sample_path, {'data': data_sample,
                               'x_train': x_train,
                               'x_test': x_test,
                               'y_train': y_train,
                               'y_test': y_test,
                               'file_information': 'Indian Pines data set, each row is a sample, first columns are index values, second columns are label, and the last 200 columns are pixel information.'})

# end
# =============================================================================

读取新mat

import numpy as np
import scipy.io as sio
import tensorflow as tf


def make_one_hot(data, num_label):
    return (np.arange(num_label) == data).astype(np.integer)

def MaxMinNormalization(matrix):
    Min = np.min(matrix)
    Max = np.max(matrix)
    mat_norm = (matrix - Min) / (Max - Min)
    return mat_norm

def get_batch_data(batch_size, x_train, y_train_onehot):
    # 数据类型转换为tf.float32
    x_train = tf.cast(x_train, tf.float32)
    y_train_onehot = tf.cast(y_train_onehot, tf.float32)
    #从tensor列表中按顺序或随机抽取一个tensor
    input_queue = tf.train.slice_input_producer([x_train, y_train_onehot], shuffle=False)
    x_batch, y_batch = tf.train.batch(input_queue, batch_size=batch_size, num_threads=1, capacity=128)
    return x_batch, y_batch

def get_files():
    dataset_path = 'data_sample_new.mat'
    data_mat = sio.matlab.loadmat(dataset_path)
    data_sample = np.array(data_mat['data'])  # read data from dict
    x_train = np.array(data_mat['x_train'])  # read x_train data from dict
    x_test = np.array(data_mat['x_test'])  # read x_test data from dict
    y_train = np.array(data_mat['y_train'])  # read y_train data from dict
    y_test = np.array(data_mat['y_test'])  # read y_test data from dict

    # 归一化
    x_train = MaxMinNormalization(x_train)
    x_test = MaxMinNormalization(x_test)
    # onehot
    y_train_onehot = make_one_hot(y_train, 16)
    y_test_onehot = make_one_hot(y_test, 16)

    print('x_train.shape',x_train.shape)
    print(x_train[0])
    print('y_train_onehot.shape',y_train_onehot.shape)
    print(y_train_onehot[0])

    return x_train,y_train_onehot

def get_batch( x_train, y_train_onehot, image_W, image_H, batch_size, capacity):
    return get_batch_data(batch_size, x_train, y_train_onehot)

def main(argv=None):
    t1,t2 = get_files()
    t3,t4 = get_batch(t1,t2, 1, 200, 6, 126)
    print("Training data is converted into images!")


if __name__ == '__main__':
    main()

#然后再training.py里get_files再get_batch即可

对mat文件取领域窗口····

我是用matlab分割mat文件(比较方便)
用pytho读取分割后的文件合并制作成数据集mat,再同上面的get_files和get_batch一样



matlab分割:

testt.m


clc, clear, close all
% load the ground truth and the hyperspectral image
path = ‘.\Dataset\’;
inputs = ‘Salinas’;
location = [path, inputs];
load(location);
%%
% estimate the size of the input image
[height, width, bands] = size(img);
%Label构成的图
GroundImage = zeros(height, width);%145*145
GroundImage(GroundT(1, :)) = GroundT(2, :);%145x145 double

X_cell = 29;
Y_cell = 29;

totalNum = floor(height/Y_cell) floor(width/X_cell);%[个数,对应每个的所有像素点数]
OutData = zeros(totalNum,X_cell
Y_cellbands);
for row=1:floor(height/Y_cell)
start = (row-1)
Y_cell;
stop = row*Y_cell;
dataCol = GroundImage(start+1:stop,:,:);%height中取出对应的height衿
Path_ = strcat(path,’Label‘,’row’,num2str(row));%strcat字符串拼接,D:\四个波段\row1’
testtfunc(dataCol,width ,X_cell,Y_cell,Path_,row );
row/floor(height/Y_cell)
end

testt.func


function [ ] = testtfunc( label,width ,X_cell,Y_cell,outputPath,row )
ColNum = floor(width / X_cell);
for col = 1:ColNum
start = (col-1)X_cell;
stop = col
X_cell;
data = label(:,start+1:stop,:);
% data(:,:,3) = dataCol(:,start+1:stop,1);

path_ =strcat(outputPath,'col',num2str(col));
%inPath=strcat(outputPath,'col',num2str(col),'.tif');
%imwrite(uint32(data),inPath);
sum = 0;
count = 0;
result = 0;
for i = 1:X_cell
    for j = 1:Y_cell
        if(data(j,i) ~= '0')
            sum = sum + data(j,i);
            count  = count + 1;
        end
    end
end

result = [sum / count];

strr = strcat(path_ , '.mat')
save(strr,'result');

% imwrite(data,inPath);
% tmp = reshape(data,[1 feature]);
% outputData(k,:)=tmp;
% k=k+1;
end

python的读取和制作mat:


import numpy as np
import scipy.io as scio
import matplotlib.pyplot as plt
import math
from sklearn.cross_validation import train_test_split

dataset_path = ‘./Salinas切割后数据/‘
label_path = ‘./Salinas切割后标签(有值的取均值)/‘

#dataname = ‘row1col1’

#data_mat = scio.loadmat(dataset_path+dataname)

#data = np.array(data_mat[‘data’])

ROW = 17
COL = 7
train_data = []
train_label = []

for i in range(ROW):
for j in range(COL):
dataname = ‘row’+str(i+1)+’col’+str(j+1)
data_mat = scio.loadmat(dataset_path+dataname)
data = data_mat[‘data’]
train_data.append(data)

train_data = np.array(train_data)

for i in range(ROW):
for j in range(COL):
dataname = ‘_Label_row’+str(i+1)+’col’+str(j+1)#_Label_row1col1
data_mat = scio.loadmat(label_path+dataname)
label = data_mat[‘result’][0]
train_label.append(label)

train_label = np.array(train_label)

print(“data.shape”,train_data.shape)
print(“data.shape”,train_label.shape)

x_train, x_test, y_train, y_test = train_test_split(
train_data,train_label,test_size=0.1, random_state=1)

save data_sample

data_sample_path = ‘./data_sample_Salinas.mat’
scio.savemat(data_sample_path, {‘data’: train_data,
‘x_train’: x_train,
‘x_test’: x_test,
‘y_train’: y_train,
‘y_test’: y_test,
‘file_information’: ‘Salinas data set, each row is a sample.Made by Kevin’})

领域窗口先保存再读取制作,是比较低效且占内存的方法·····
领域窗口分割如果直接使用python会更好,只比matlab分割麻烦些。python可以用nump的zero创建矩阵然后解析原ma进行填充,再作分割,或者领域窗口滑动