from io import BytesIO import numpy as np import pandas as pd import networkx as nx import matplotlib.pyplot as plt import seaborn as sns from matplotlib import rcParams # 全局设置中文字体 rcParams['font.sans-serif'] = ['SimHei'] # 设置中文字体为黑体 rcParams['axes.unicode_minus'] = False # 解决坐标轴负号显示问题 # 计算相关系数矩阵 def row_correlations(df): num_rows = df.shape[0] corr_matrix = np.zeros((num_rows, num_rows)) # 标准化数据,并处理标准差为零的情况 def standardize_column(x): std = x.std() return (x - x.mean()) / std if std != 0 else np.zeros_like(x) standardized_df = df.apply(standardize_column, axis=0) for i in range(num_rows): for j in range(i, num_rows): r = np.corrcoef(standardized_df.iloc[i], standardized_df.iloc[j])[0, 1] corr_matrix[i, j] = r corr_matrix[j, i] = r corr_df = pd.DataFrame(corr_matrix, index=df.index, columns=df.index) return corr_df def row_euclidean_distances(df): # 获取DataFrame的行数 num_rows = df.shape[0] # 初始化一个空的对称方阵用于存储欧氏距离 distance_matrix = np.zeros((num_rows, num_rows)) # 计算每两行之间的欧氏距离 for i in range(num_rows): for j in range(i, num_rows): # 计算第i行和第j行的欧氏距离 distance = np.linalg.norm(df.iloc[i] - df.iloc[j]) # 存储在对称方阵中 distance_matrix[i, j] = distance distance_matrix[j, i] = distance # 将欧氏距离矩阵转换为DataFrame distance_df = pd.DataFrame(distance_matrix, index=df.index, columns=df.index) return distance_df def calculate_similarity(correlation_df, distance_df): # 初始化一个空的对称方阵用于存储相似度 num_rows = correlation_df.shape[0] similarity_matrix = np.zeros((num_rows, num_rows)) # 计算相似度S for i in range(num_rows): for j in range(i, num_rows): r = correlation_df.iloc[i, j] d = distance_df.iloc[i, j] R = (1 + r) / 2 S = 100 * R / d if d != 0 else np.inf # 如果距离为0,相似度设为无穷大 # 存储在对称方阵中 similarity_matrix[i, j] = S similarity_matrix[j, i] = S # 将相似度矩阵转换为DataFrame similarity_df = pd.DataFrame(similarity_matrix, index=correlation_df.index, columns=correlation_df.index) return similarity_df # 绘制相似度网络图 def plot_similarity_network(similarity_df, threshold=0.5): G = nx.Graph() for i in range(len(similarity_df)): for j in range(i + 1, len(similarity_df)): if similarity_df.iloc[i, j] > threshold: G.add_edge(similarity_df.index[i], similarity_df.columns[j], weight=similarity_df.iloc[i, j]) pos = nx.spring_layout(G) plt.figure(figsize=(10, 8)) edges = G.edges(data=True) # 根据相似度调整边的宽度 weights = [e['weight'] for u, v, e in edges] # 设置颜色为浅色调 nx.draw(G, pos, with_labels=True, node_size=700, node_color='lightblue', font_size=10, width=2, edge_color=weights, edge_cmap=plt.cm.Blues, edge_vmin=min(weights), edge_vmax=max(weights)*0.5) # 显示边的相似度值 edge_labels = {(u, v): f'{e["weight"]:.2f}' for u, v, e in edges} nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels) plt.suptitle("相似度网络图", fontsize=16) # plt.title('Similarity Network Graph') # 将图像保存到字节流中 img_bytes = BytesIO() plt.savefig(img_bytes, format='png') # 设置字节流的位置到开始 img_bytes.seek(0) # 输出图像 plt.close() return img_bytes # 绘制热力图 def plot_similarity_heatmap(similarity_df): plt.figure(figsize=(10, 8)) sns.heatmap(similarity_df, annot=True, fmt=".2f", cmap="coolwarm", linewidths=.5) plt.title("相关系数热力图", fontsize=16) plt.xlabel('样品编号') plt.ylabel('样品编号') # 将图像保存到字节流中 img_bytes = BytesIO() plt.savefig(img_bytes, format='png') # 设置字节流的位置到开始 img_bytes.seek(0) # 输出图像 plt.close() return img_bytes # 示例用法 if __name__ == '__main__': # 示例矩阵,和之前的扇形图要求一样 matrix = pd.read_csv('./radartest.csv', index_col=0) # 调用函数 correlation_result = row_correlations(matrix) distance_result = row_euclidean_distances(matrix) similarity_result = calculate_similarity(correlation_result, distance_result) # 输出相关系数矩阵 print("每两行之间的相关系数矩阵:") print(correlation_result) # 输出欧氏距离矩阵 print("每两行之间的欧氏距离矩阵:") print(distance_result) # 输出相似度矩阵 print("每两行之间的相似度矩阵:") print(similarity_result) # 绘制相关系数热力图 plot_similarity_heatmap(correlation_result) # 绘制相似度网络图 plot_similarity_network(similarity_result, threshold=0.5)