145 lines
4.6 KiB
Python
145 lines
4.6 KiB
Python
import numpy as np
|
|
import pandas as pd
|
|
import networkx as nx
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
from matplotlib import rcParams
|
|
|
|
# 全局设置中文字体
|
|
rcParams['font.sans-serif'] = ['SimHei'] # 设置中文字体为黑体
|
|
rcParams['axes.unicode_minus'] = False # 解决坐标轴负号显示问题
|
|
|
|
# 计算相关系数矩阵
|
|
def row_correlations(df):
|
|
num_rows = df.shape[0]
|
|
corr_matrix = np.zeros((num_rows, num_rows))
|
|
|
|
# 标准化数据,并处理标准差为零的情况
|
|
def standardize_column(x):
|
|
std = x.std()
|
|
return (x - x.mean()) / std if std != 0 else np.zeros_like(x)
|
|
|
|
standardized_df = df.apply(standardize_column, axis=0)
|
|
|
|
for i in range(num_rows):
|
|
for j in range(i, num_rows):
|
|
r = np.corrcoef(standardized_df.iloc[i], standardized_df.iloc[j])[0, 1]
|
|
corr_matrix[i, j] = r
|
|
corr_matrix[j, i] = r
|
|
|
|
corr_df = pd.DataFrame(corr_matrix, index=df.index, columns=df.index)
|
|
return corr_df
|
|
|
|
|
|
def row_euclidean_distances(df):
|
|
# 获取DataFrame的行数
|
|
num_rows = df.shape[0]
|
|
|
|
# 初始化一个空的对称方阵用于存储欧氏距离
|
|
distance_matrix = np.zeros((num_rows, num_rows))
|
|
|
|
# 计算每两行之间的欧氏距离
|
|
for i in range(num_rows):
|
|
for j in range(i, num_rows):
|
|
# 计算第i行和第j行的欧氏距离
|
|
distance = np.linalg.norm(df.iloc[i] - df.iloc[j])
|
|
# 存储在对称方阵中
|
|
distance_matrix[i, j] = distance
|
|
distance_matrix[j, i] = distance
|
|
|
|
# 将欧氏距离矩阵转换为DataFrame
|
|
distance_df = pd.DataFrame(distance_matrix, index=df.index, columns=df.index)
|
|
|
|
return distance_df
|
|
|
|
|
|
def calculate_similarity(correlation_df, distance_df):
|
|
# 初始化一个空的对称方阵用于存储相似度
|
|
num_rows = correlation_df.shape[0]
|
|
similarity_matrix = np.zeros((num_rows, num_rows))
|
|
|
|
# 计算相似度S
|
|
for i in range(num_rows):
|
|
for j in range(i, num_rows):
|
|
r = correlation_df.iloc[i, j]
|
|
d = distance_df.iloc[i, j]
|
|
R = (1 + r) / 2
|
|
S = 100 * R / d if d != 0 else 100 * R / 0.25
|
|
# 存储在对称方阵中
|
|
similarity_matrix[i, j] = S
|
|
similarity_matrix[j, i] = S
|
|
|
|
# 将相似度矩阵转换为DataFrame
|
|
similarity_df = pd.DataFrame(similarity_matrix, index=correlation_df.index, columns=correlation_df.index)
|
|
|
|
return similarity_df
|
|
|
|
|
|
# 绘制相似度网络图
|
|
def plot_similarity_network(similarity_df, threshold=0.5):
|
|
G = nx.Graph()
|
|
for i in range(len(similarity_df)):
|
|
for j in range(i + 1, len(similarity_df)):
|
|
if similarity_df.iloc[i, j] > threshold:
|
|
G.add_edge(similarity_df.index[i], similarity_df.columns[j], weight=similarity_df.iloc[i, j])
|
|
|
|
pos = nx.spring_layout(G)
|
|
plt.figure(figsize=(10, 8))
|
|
edges = G.edges(data=True)
|
|
|
|
# 根据相似度调整边的宽度
|
|
weights = [e['weight'] for u, v, e in edges]
|
|
|
|
# 设置颜色为浅色调
|
|
nx.draw(G, pos, with_labels=True, node_size=700, node_color='lightblue', font_size=10,
|
|
width=2, edge_color=weights, edge_cmap=plt.cm.Blues, edge_vmin=min(weights),
|
|
edge_vmax=max(weights)*0.5)
|
|
|
|
# 显示边的相似度值
|
|
edge_labels = {(u, v): f'{e["weight"]:.2f}' for u, v, e in edges}
|
|
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
|
|
|
|
plt.suptitle("相似度网络图", fontsize=16)
|
|
# plt.title('Similarity Network Graph')
|
|
plt.show()
|
|
|
|
# 绘制热力图
|
|
def plot_similarity_heatmap(similarity_df):
|
|
plt.figure(figsize=(10, 8))
|
|
sns.heatmap(similarity_df, annot=True, fmt=".2f", cmap="coolwarm", linewidths=.5)
|
|
plt.title("相关系数热力图", fontsize=16)
|
|
plt.xlabel('样品编号')
|
|
plt.ylabel('样品编号')
|
|
plt.show()
|
|
|
|
|
|
|
|
# 示例用法
|
|
if __name__ == '__main__':
|
|
# 示例矩阵,和之前的扇形图要求一样
|
|
matrix = pd.read_csv('./radartest.csv', index_col=0)
|
|
|
|
# 调用函数
|
|
correlation_result = row_correlations(matrix)
|
|
distance_result = row_euclidean_distances(matrix)
|
|
similarity_result = calculate_similarity(correlation_result, distance_result)
|
|
|
|
# 输出相关系数矩阵
|
|
print("每两行之间的相关系数矩阵:")
|
|
print(correlation_result)
|
|
|
|
|
|
# 输出欧氏距离矩阵
|
|
print("每两行之间的欧氏距离矩阵:")
|
|
print(distance_result)
|
|
|
|
# 输出相似度矩阵
|
|
print("每两行之间的相似度矩阵:")
|
|
print(similarity_result)
|
|
|
|
# 绘制相关系数热力图
|
|
plot_similarity_heatmap(correlation_result)
|
|
|
|
# 绘制相似度网络图
|
|
plot_similarity_network(similarity_result, threshold=0.5)
|