Source code for pynetim.evaluation.seed_quality_metrics

# -*- coding: utf-8 -*-
"""种子节点质量评估指标。

提供种子节点集合质量的评估指标函数。
"""

from typing import List, Set, Dict, Optional, Union, TYPE_CHECKING
import numpy as np

if TYPE_CHECKING:
    from ..graph import IMGraph


[docs] def neighbor_coverage( graph: 'IMGraph', seeds: Set[int] ) -> float: """计算种子节点的邻居覆盖率。 邻居覆盖率 = 种子节点的唯一邻居数 / 网络总节点数 Args: graph: 图对象。 seeds: 种子节点集合。 Returns: float: 邻居覆盖率,范围 [0, 1]。 Example: >>> from pynetim.evaluation import neighbor_coverage >>> coverage = neighbor_coverage(graph, seeds) >>> print(f"Neighbor coverage: {coverage:.2%}") """ if len(seeds) == 0: return 0.0 neighbors = set() for seed in seeds: neighbors.update(graph.out_neighbors(seed)) neighbors -= seeds return len(neighbors) / graph.num_nodes
[docs] def degree_statistics( graph: 'IMGraph', seeds: Set[int] ) -> Dict[str, float]: """计算种子节点的度统计信息。 Args: graph: 图对象。 seeds: 种子节点集合。 Returns: Dict[str, float]: 包含以下统计量: - mean_degree: 平均度 - max_degree: 最大度 - min_degree: 最小度 - std_degree: 度标准差 Example: >>> from pynetim.evaluation import degree_statistics >>> stats = degree_statistics(graph, seeds) >>> print(f"Mean degree: {stats['mean_degree']:.2f}") """ if len(seeds) == 0: return { 'mean_degree': 0.0, 'max_degree': 0, 'min_degree': 0, 'std_degree': 0.0 } degrees = graph.batch_out_degree(list(seeds)) return { 'mean_degree': float(np.mean(degrees)), 'max_degree': int(np.max(degrees)), 'min_degree': int(np.min(degrees)), 'std_degree': float(np.std(degrees)) }
[docs] def degree_distribution( graph: 'IMGraph', seeds: Set[int] ) -> Dict[int, int]: """计算种子节点的度分布。 Args: graph: 图对象。 seeds: 种子节点集合。 Returns: Dict[int, int]: 度值到节点数量的映射。 Example: >>> from pynetim.evaluation import degree_distribution >>> dist = degree_distribution(graph, seeds) >>> print(f"Degree 5: {dist.get(5, 0)} nodes") """ distribution = {} for seed in seeds: degree = graph.out_degree(seed) distribution[degree] = distribution.get(degree, 0) + 1 return distribution
[docs] def mean_centrality( graph: 'IMGraph', seeds: Set[int], centrality_type: str = 'degree' ) -> float: """计算种子节点的平均中心性。 Args: graph: 图对象。 seeds: 种子节点集合。 centrality_type: 中心性类型,可选: - 'degree': 度中心性 - 'in_degree': 入度中心性 - 'out_degree': 出度中心性 Returns: float: 平均中心性值。 Example: >>> from pynetim.evaluation import mean_centrality >>> centrality = mean_centrality(graph, seeds, centrality_type='degree') """ if len(seeds) == 0: return 0.0 n = graph.num_nodes if centrality_type == 'degree': centralities = [graph.out_degree(seed) / (n - 1) for seed in seeds] elif centrality_type == 'in_degree': centralities = [graph.in_degree(seed) / (n - 1) for seed in seeds] elif centrality_type == 'out_degree': centralities = [graph.out_degree(seed) / (n - 1) for seed in seeds] else: raise ValueError(f"Unknown centrality type: {centrality_type}") return float(np.mean(centralities))
[docs] def seed_overlap( seeds1: Set[int], seeds2: Set[int] ) -> float: """计算两组种子节点的重叠率。 Jaccard相似度 = |S1 ∩ S2| / |S1 ∪ S2| Args: seeds1: 第一组种子节点。 seeds2: 第二组种子节点。 Returns: float: 重叠率,范围 [0, 1]。 Example: >>> from pynetim.evaluation import seed_overlap >>> overlap = seed_overlap(seeds1, seeds2) >>> print(f"Overlap: {overlap:.2%}") """ if len(seeds1) == 0 and len(seeds2) == 0: return 1.0 intersection = len(seeds1 & seeds2) union = len(seeds1 | seeds2) return intersection / union if union > 0 else 0.0
[docs] def seed_diversity( graph: 'IMGraph', seeds: Set[int] ) -> float: """计算种子节点的多样性。 基于种子节点之间的平均距离评估多样性。 Args: graph: 图对象。 seeds: 种子节点集合。 Returns: float: 多样性得分,范围 [0, 1]。 - 1.0 表示种子节点分布非常分散 - 0.0 表示种子节点非常集中 Example: >>> from pynetim.evaluation import seed_diversity >>> diversity = seed_diversity(graph, seeds) """ from .influence_metrics import average_shortest_distance if len(seeds) < 2: return 0.0 avg_distance = average_shortest_distance(graph, seeds) if avg_distance < 0: return 0.0 max_possible_distance = graph.num_nodes - 1 return min(avg_distance / max_possible_distance, 1.0)
[docs] def weight_statistics( graph: 'IMGraph', seeds: Set[int] ) -> Dict[str, float]: """计算种子节点相关边的权重统计信息。 Args: graph: 图对象。 seeds: 种子节点集合。 Returns: Dict[str, float]: 包含以下统计量: - mean_weight: 平均权重 - max_weight: 最大权重 - min_weight: 最小权重 - total_weight: 总权重 Example: >>> from pynetim.evaluation import weight_statistics >>> stats = weight_statistics(graph, seeds) >>> print(f"Mean weight: {stats['mean_weight']:.4f}") """ if len(seeds) == 0: return { 'mean_weight': 0.0, 'max_weight': 0.0, 'min_weight': 0.0, 'total_weight': 0.0 } weights = [] for seed in seeds: for neighbor in graph.out_neighbors(seed): weight = graph.get_edge_weight(seed, neighbor) if weight is not None: weights.append(weight) if len(weights) == 0: return { 'mean_weight': 0.0, 'max_weight': 0.0, 'min_weight': 0.0, 'total_weight': 0.0 } return { 'mean_weight': float(np.mean(weights)), 'max_weight': float(np.max(weights)), 'min_weight': float(np.min(weights)), 'total_weight': float(np.sum(weights)) }
[docs] def clustering_coefficient( graph: 'IMGraph', seeds: Set[int] ) -> float: """计算种子节点的平均聚类系数。 聚类系数衡量节点邻居之间的连接密度。 Args: graph: 图对象。 seeds: 种子节点集合。 Returns: float: 平均聚类系数,范围 [0, 1]。 Example: >>> from pynetim.evaluation import clustering_coefficient >>> cc = clustering_coefficient(graph, seeds) >>> print(f"Clustering coefficient: {cc:.4f}") """ if len(seeds) == 0: return 0.0 coefficients = [] for seed in seeds: neighbors = list(graph.out_neighbors(seed)) k = len(neighbors) if k < 2: coefficients.append(0.0) continue actual_edges = 0 for i in range(len(neighbors)): for j in range(i + 1, len(neighbors)): if graph.has_edge(neighbors[i], neighbors[j]): actual_edges += 1 possible_edges = k * (k - 1) / 2 cc = actual_edges / possible_edges if possible_edges > 0 else 0.0 coefficients.append(cc) return float(np.mean(coefficients))