首先我们知道iris数据集包含三种花种类,每种花对应50个样本数据,每个数据包含4个特征数据,
#Definiation of COLs:
#1. sepal length in cm (花萼长)
#2. sepal width in cm(花萼宽)
#3. petal length in cm (花瓣长)
#4. petal width in cm(花瓣宽)
1)对比下单独每个特征下的最大最小值,观察对应每种花是否有清晰的界限
[python]
- from sklearn import datasets
- import matplotlib.pyplot as plt
- import numpy as np
- from matplotlib import cm
- iris = datasets.load_iris()
- irisFeatures = iris["data"]
- irisFeaturesName = iris["feature_names"]
- irisLabels = iris["target"]
- def scatter_plot(dim1, dim2):
- for t,marker,color in zip(range(3),">ox","rgb"):
- # zip()接受任意多个序列参数,返回一个元组tuple列表
- # 用不同的标记和颜色画出每种品种iris花朵的前两维数据
- # We plot each class on its own to get different colored markers
- plt.scatter(irisFeatures[irisLabels == t,dim1],
- irisFeatures[irisLabels == t,dim2],marker=marker,c=color)
- dim_meaning = {0:'setal length',1:'setal width',2:'petal length',3:'petal width'}
- plt.xlabel(dim_meaning.get(dim1))
- plt.ylabel(dim_meaning.get(dim2))
- #同一个特征,在不同的花种类中的最大最小分布
- def getMax_Min(feature_No): #每次比较一列特征,在不同种类花 的最大最小值
- max_y1 = max(iris.data[:50,feature_No])
- min_y1 = min(iris.data[:50,feature_No])
- max_y2 = max(iris.data[50:100,feature_No])
- min_y2 = min(iris.data[50:100,feature_No])
- max_y3 = max(iris.data[100:150,feature_No])
- min_y3 = min(iris.data[100:150,feature_No])
- return max_y1,min_y1,max_y2,min_y2,max_y3,min_y3
- '''''
- print('================ %s ' % iris.feature_names[feature_No])
- print('------%s'%iris.target_names[0])
- print(max_y1)
- print(min_y1)
- print('------%s'%iris.target_names[1])
- print(max_y2)
- print(min_y2)
- print('------%s'%iris.target_names[2])
- print(max_y3)
- print(min_y3)
- '''
- def main():
- for i in range(4): #每次传递一个特征列
- max_y1,min_y1,max_y2,min_y2,max_y3,min_y3 = getMax_Min(i)
- label = ['Max-setosa','Min-setosa','Max-versicolor','Min-versicolor','Max-virginica','Min-virginica']
- x = [max_y1,min_y1,max_y2,min_y2,max_y3,min_y3]
- colormap = [4,4,2,2,3,3]
- idx = np.arange(len(x))
- color = cm.jet(np.array(colormap)/max(colormap))
- plt.barh(idx, x, color=color)
- # plt.barh(idx, x)
- plt.yticks(idx+0.4,label,rotation=60)
- plt.grid(axis='x')
- plt.xlabel('value')
- plt.ylabel('Type of flower')
- plt.title(str(iris.feature_names[i]))
- plt.show()
- #========================
- plt.figure(1)
- plt.subplot(231)
- scatter_plot(0,1)
- plt.subplot(232)
- scatter_plot(0,2)
- plt.subplot(233)
- scatter_plot(0,3)
- plt.subplot(234)
- scatter_plot(1,2)
- plt.subplot(235)
- scatter_plot(1,3)
- plt.subplot(236)
- scatter_plot(2,3)
- plt.show()
- if __name__ == '__main__':
- main()
登录 | 立即注册