ChengYue's Cat

Boosting the Generalization Capability in Cross-Domain Few-shot Learning via Noise-enhanced Supervised Autoencoder 这篇文章提到了一个评估特征提取器的泛化能力的指标ICC, 准确来说是Statistical analysis of discriminability。 The ICC is defined as the ratio of inter-class variation and the intraclass variation.

绘制AWA2的ICC(L2正则化特征)

 import pickle as pkl
import torch as t
import numpy as np
import torch.nn.functional as F
import matplotlib.pyplot as plt
from brokenaxes import brokenaxes
from matplotlib import rcParams
from matplotlib.font_manager import FontProperties
import pylab
# pylab.rcParams['figure.figsize'] = (15.0, 10.0) 

config = {
    "font.family":'Times New Roman',
    "font.size": 10,
    "mathtext.fontset":'stix',
    "font.serif": ['SimSun'],
}
rcParams.update(config)



def plot_bar(data_1, data_2, data_3, y_lim):
    
    plt.figure(2)   #  创建第一个画板
    w = 1.0
    x = np.arange(len(data_1))

#     # 绘制条形图
#     plt.subplot(111)    #  分成2*2,占用第一个,即第一行第一列的子图
#     plt.bar(x, data_1, label="one", color='r', width=w)
#     plt.bar(x, data_2, label="two", color='g', width=w)
#     plt.bar(x, data_3, label="three", color='b', width=w)

    if y_lim:
        bax = brokenaxes(ylims=y_lim, hspace=.05, despine=False)
    else:
        bax = brokenaxes(hspace=.05, despine=False)
    bax.plot(x,data_1, 's-',color = '#FA8072', label="CoF")  
    bax.plot(x,data_2, 'o-',color = '#20B2AA', label="VF") 
    bax.plot(x,data_3, 'v-',color = '#FF8C00', label="Raw") 
    bax.set_ylabel(r"Intra_Var")
    bax.legend()
#     plt.plot(x, data_1, 's-',color = '#FA8072', label="CoF")  
#     plt.plot(x, data_2, 'o-',color = '#20B2AA', label="VF") 
#     plt.plot(x, data_3, 'v-',color = '#FF8C00', label="Raw") 
    
#     plt.ylim(0, 1.0)
#     plt.legend()
    
#     plt.ylabel(r"Intra_Var")
    
#     plt.savefig("ICC_AWA_Intra_unseen.png", bbox_inches='tight', pad_inches=0, dpi=800)
#     plt.savefig("ICC_AWA_Inter.unseen.png", bbox_inches='tight', pad_inches=0, dpi=800)
    plt.savefig("ICC_AWA_seen_intra.png", bbox_inches='tight', pad_inches=0, dpi=800)



def compute_intra_class_variation(pkl_data_path):

    pkl_dataset = pkl.load(open(pkl_data_path, 'rb'), encoding='utf-8')

    train_X, train_Y = pkl_dataset['train_seen']['fea'], pkl_dataset['train_seen']['labels']
    test_seen_X, test_seen_Y = pkl_dataset['test_seen']['fea'], pkl_dataset['test_seen']['labels']
    test_unseen_X, test_unseen_Y = pkl_dataset['test_unseen']['fea'], pkl_dataset['test_unseen']['labels']

    # all class
    seen_classes = np.unique(train_Y)
    data = F.normalize(t.from_numpy(train_X), dim=1, p=2).numpy()
    labels = train_Y
    
#     seen_classes = np.unique(test_unseen_Y)
#     data =  F.normalize(t.from_numpy(test_unseen_X), dim=1, p=2).numpy()
#     labels = test_unseen_Y

    var_group = []

    for e in range(50):
        # random 5 class
        sample_class = np.random.choice(seen_classes, 10, replace=False)  # False, 不重复抽样
        mu, var = [] ,  []
        for i in sample_class:
            idx = (labels == i).nonzero()[0]
#             print(idx.shape)
            class_i_data = data[idx]
            class_i_mu = np.mean(class_i_data, axis = 0)
            class_i_var = np.var(class_i_data, axis = 0).mean()

            mu.append(class_i_mu)
            var.append(class_i_var)

        var_group.append(np.array(var).mean())
    
    return np.array(var_group)

# read pkl_dataset
def compute_inter_class_variation(pkl_data_path):

    pkl_dataset = pkl.load(open(pkl_data_path, 'rb'), encoding='utf-8')

    train_X, train_Y = pkl_dataset['train_seen']['fea'], pkl_dataset['train_seen']['labels']
    test_seen_X, test_seen_Y = pkl_dataset['test_seen']['fea'], pkl_dataset['test_seen']['labels']
    test_unseen_X, test_unseen_Y = pkl_dataset['test_unseen']['fea'], pkl_dataset['test_unseen']['labels']

    # all class
    seen_classes = np.unique(train_Y)
    data = F.normalize(t.from_numpy(train_X), dim=1, p=2).numpy()
    labels = train_Y
    
    
#     seen_classes = np.unique(test_unseen_Y)
#     data = F.normalize(t.from_numpy(test_unseen_X), dim=1, p=2).numpy()
#     labels = test_unseen_Y

    var_group = []

    for e in range(50):
        # random 5 class
        sample_class = np.random.choice(seen_classes, 10, replace=False)  # False, 不重复抽样
        mu, var = [] ,  []
        for i in sample_class:
            idx = (labels == i).nonzero()[0]
            class_i_data = data[idx]
            class_i_mu = np.mean(class_i_data, axis = 0)
            class_i_var = np.var(class_i_data, axis = 0).mean()

            mu.append(class_i_mu)
            var.append(class_i_var)
        
        k = len(sample_class)
        
        tmp = 0
        for i in range(k):
            for j in range(k):
                tmp += np.linalg.norm(mu[j]-mu[i],ord=2)
        var_group.append(tmp/(k*(k-1)))
        
    return np.array(var_group)
    
 pkl_data_path_1 = r'C:/Users/ZY2006245/Desktop/fsdownload/V2S_CLS_AWA2_448_11_lr_0.0005_decay_0.001_data.pkl'
# pkl_data_path_2 = r'C:/Users/ZY2006245/Desktop/fsdownload/AWA2_448_6_data_lr_5e-4_decay_1e-3_iter_100_ck.pkl'
pkl_data_path_3 = r'C:/Users/ZY2006245/Desktop/fsdownload/AWA2_448_8_data_vanilla_ck.pkl'
pkl_data_path_4 = r'C:/Users/ZY2006245/Desktop/fsdownload/xlsa_awa2_pkl_data.pkl'


intra_var_1 = compute_intra_class_variation(pkl_data_path=pkl_data_path_1) * 10000
intra_var_2 = compute_intra_class_variation(pkl_data_path=pkl_data_path_3) * 10000
intra_var_3 = compute_intra_class_variation(pkl_data_path=pkl_data_path_4) * 10000
print(intra_var_1[0], intra_var_2[0], intra_var_3[0])

ylims=((0.4, 0.8), (2.0, 2.5))
plot_bar(intra_var_1, intra_var_2, intra_var_3, y_lim=ylims)

# inter_var_1 = compute_inter_class_variation(pkl_data_path=pkl_data_path_1)
# inter_var_2 = compute_inter_class_variation(pkl_data_path=pkl_data_path_3)
# inter_var_3 = compute_inter_class_variation(pkl_data_path=pkl_data_path_4)
# print(inter_var_1)

# ylims=((0.0, 0.1),)
# plot_bar(inter_var_1, inter_var_2, inter_var_3, y_lim=ylims)

# ICC_1 = np.array(inter_var_1)/np.array(intra_var_1)
# ICC_2 = np.array(inter_var_2)/np.array(intra_var_2)
# ICC_3 = np.array(inter_var_3)/np.array(intra_var_3)
# print(ICC_1)

# plot_bar(ICC_1, ICC_2, ICC_3, y_lim=1.0)