背景:需要对数据中元素较多的list根据子矩阵进行聚类,聚类结果进行相关处理。
目录
一、创建子矩阵
1.1 子矩阵定义
1.2 创建子矩阵
二、字典、list、array的添加与遍历
2.1 添加元素
2.2 遍历字典
2.3 添加list元素
2.4 遍历list
2.5 np.array
2.6 array中元素
三、完整程序
https://zhidao.baidu.com/question/1609139445669797947.html
https://blog.csdn.net/qq_16964363/article/details/79497917
设原矩阵为m行n列,则取原矩阵的第 a1,a2,...,ak行(0 相当于求交叉点的元素 创建list,list中为需要采样的行数和列数 然后for list中的元素 子矩阵行下标为 row_idx, 对应原矩阵 行下标为 classes_in_current_split_group[row_idx] 直接 字典[key]=key_value 中括号内为 键,等号后为 键值 print该字典为 直接for 变量 in 字典 则变量即为key, 字典[变量] 即字典中元素 直接 listname.append(需要加入的元素) 这里需要注意,len需要 in range(len(list_name)),如果直接 for index in len(list_name)则会报错 同时,找list中元素用中括号[], 中括号中不同维度用逗号隔开。 创建数组 https://www.cnblogs.com/hezhefly/p/8278842.html 创建的array名称=np.zeros([维度]) arrya_name[维度0 坐标,维度1坐标] 例如:1.2 创建子矩阵
# construct sub_correlation_matrix
sub_correlation_matrix=np.zeros([len(classes_in_current_split_group),len(classes_in_current_split_group)])
# print("sub_correlation_matrix before initialize: \n",sub_correlation_matrix)
for row_idx in range(len(classes_in_current_split_group)):
for col_idx in range(len(classes_in_current_split_group)):
sub_correlation_matrix[row_idx,col_idx]=coco_correlation_A_B[classes_in_current_split_group[row_idx],classes_in_current_split_group[col_idx]]
# print("sub_correlation_matrix after initialize: \n",sub_correlation_matrix)
二、字典、list、array的添加与遍历
2.1 添加元素
split_groups={}
split_groups[1]=classes_number
{1: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
2.2 遍历字典
for key in split_groups:
print("\ngroup:",key,
" group element numbers",len(split_groups[key]),
"\ngroup_elements",split_groups[key])
2.3 添加list元素
# number of label classes from 0,1,2,...79
classes_number=[]
for class_idx in range(0,80):
classes_number.append(class_idx)
2.4 遍历list
for row_idx in range(len(classes_in_current_split_group)):
for col_idx in range(len(classes_in_current_split_group)):
sub_correlation_matrix[row_idx,col_idx]=coco_correlation_A_B[classes_in_current_split_group[row_idx],classes_in_current_split_group[col_idx]]
2.5 np.array
sub_correlation_matrix=np.zeros([len(classes_in_current_split_group),len(classes_in_current_split_group)])
2.6 array中元素
for row_idx in range(len(classes_in_current_split_group)):
for col_idx in range(len(classes_in_current_split_group)):
sub_correlation_matrix[row_idx,col_idx]=coco_correlation_A_B[classes_in_current_split_group[row_idx],classes_in_current_split_group[col_idx]]
三、完整程序
#-*-coding:utf-8 -*-
"""
created by xingxinangrui on 2019.5.7
this program is to perform spectral clustering on coco dataset labels
Cluster big groups to two groups until no elemnts size >10
-----------------------1.----------------------------
load coco_correlations.pkl
load coco_names.pkl
in which:
names is a 80 dimension list contains label names
names : ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck',
......
'hot dog', 'pizza', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
A_B is a correlation matrix
A_B [[1.00000000e+00 8.26410144e-01 7.04392284e-01 ... 4.03311258e-01
4.45312500e-01 5.40000000e-01]
...
[8.36764511e-03 1.74901618e-03 6.97188008e-04 ... 3.97350993e-03
1.40625000e-01 1.00000000e+00]]
A_B.shape (80, 80)
notA_B.shape (80, 80)
A_notB.shape (80, 80)
notA_notB.shape (80, 80)
correlations = {}
correlations.update(pp=A_B) #p(A/B)
correlations.update(fp=notA_B) # P(not A/B)
correlations.update(pf=A_notB)
correlations.update(ff=notA_notB)
----------------------2.------------------------
cluster from one big group to two small groups until all group_element_number < max_classes_per_group
"""
import numpy as np
from sklearn import datasets
from sklearn.cluster import SpectralClustering
from sklearn import metrics
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import json
import os
import argparse
import warnings
warnings.filterwarnings("ignore")
# env/bin/python sk_spectral_cluster/coco_SpecCluster_iter2group.py --max_classes_per_group 10 --probability_filter_threshold 0.1 --show_cluster_process 0
parser = argparse.ArgumentParser(description='coco_label_spectral_clustering_iter_two_groups')
parser.add_argument('--max_classes_per_group', '-i', default=10, type=int,
metavar='N', help='Max classes in each group after cluster')
parser.add_argument('--probability_filter_threshold', default=0.1, type=float,
help='filter probilities which less than probability_filter_threshold set to 0')
parser.add_argument('--show_cluster_process', default=1, type=int,
help='if 1 show cluster process, else just show final result')
def coco_label_spectral_clustering_iter_two_groups():
#parsars
global args
args = parser.parse_args()
max_classes_per_group=args.max_classes_per_group
show_cluster_process=args.show_cluster_process
print("-----------------------------------------------------")
print("-----------------------------------------------------")
print("-----------------------------------------------------")
# ----------------------load coco_correlations.pkl and load coco_names.pkl--------
with open('sk_spectral_cluster/coco_correlations.pkl', 'rb') as f:
print("loading coco_correlations.pkl ")
correlations= pickle.load(f)
with open('sk_spectral_cluster/coco_names.pkl', 'rb') as f:
print("loading coco_names.pkl")
names=pickle.load(f)
coco_correlation_before_filter=correlations['pp']
#print('coco label corrrelation matrix (80*80) : \n' , coco_correlation_A_B)
# filter probilities which less than probability_filter_threshold set to 0
coco_correlation_A_B=coco_correlation_before_filter
probability_filter_threshold=args.probability_filter_threshold
for row_idx in range(coco_correlation_before_filter.shape[0]):
for col_idx in range(coco_correlation_before_filter.shape[1]):
# print(coco_correlation_before_filter[row_idx,col_idx])
if coco_correlation_A_B[row_idx,col_idx]