Source code for pynetim.evaluation.seed_quality_metrics

# -*- coding: utf-8 -*-
"""种子节点质量评估指标。

提供种子节点集合质量的评估指标函数。
"""

from typing import List, Set, Dict, Optional, Union, TYPE_CHECKING
import numpy as np

if TYPE_CHECKING:
    from ..graph import IMGraph



[docs]
def neighbor_coverage(
    graph: 'IMGraph',
    seeds: Set[int]
) -> float:
    """计算种子节点的邻居覆盖率。
    
    邻居覆盖率 = 种子节点的唯一邻居数 / 网络总节点数
    
    Args:
        graph: 图对象。
        seeds: 种子节点集合。
    
    Returns:
        float: 邻居覆盖率，范围 [0, 1]。
    
    Example:
        >>> from pynetim.evaluation import neighbor_coverage
        >>> coverage = neighbor_coverage(graph, seeds)
        >>> print(f"Neighbor coverage: {coverage:.2%}")
    """
    if len(seeds) == 0:
        return 0.0
    
    neighbors = set()
    for seed in seeds:
        neighbors.update(graph.out_neighbors(seed))
    
    neighbors -= seeds
    
    return len(neighbors) / graph.num_nodes




[docs]
def degree_statistics(
    graph: 'IMGraph',
    seeds: Set[int]
) -> Dict[str, float]:
    """计算种子节点的度统计信息。
    
    Args:
        graph: 图对象。
        seeds: 种子节点集合。
    
    Returns:
        Dict[str, float]: 包含以下统计量：
            - mean_degree: 平均度
            - max_degree: 最大度
            - min_degree: 最小度
            - std_degree: 度标准差
    
    Example:
        >>> from pynetim.evaluation import degree_statistics
        >>> stats = degree_statistics(graph, seeds)
        >>> print(f"Mean degree: {stats['mean_degree']:.2f}")
    """
    if len(seeds) == 0:
        return {
            'mean_degree': 0.0,
            'max_degree': 0,
            'min_degree': 0,
            'std_degree': 0.0
        }
    
    degrees = graph.batch_out_degree(list(seeds))
    
    return {
        'mean_degree': float(np.mean(degrees)),
        'max_degree': int(np.max(degrees)),
        'min_degree': int(np.min(degrees)),
        'std_degree': float(np.std(degrees))
    }




[docs]
def degree_distribution(
    graph: 'IMGraph',
    seeds: Set[int]
) -> Dict[int, int]:
    """计算种子节点的度分布。
    
    Args:
        graph: 图对象。
        seeds: 种子节点集合。
    
    Returns:
        Dict[int, int]: 度值到节点数量的映射。
    
    Example:
        >>> from pynetim.evaluation import degree_distribution
        >>> dist = degree_distribution(graph, seeds)
        >>> print(f"Degree 5: {dist.get(5, 0)} nodes")
    """
    distribution = {}
    for seed in seeds:
        degree = graph.out_degree(seed)
        distribution[degree] = distribution.get(degree, 0) + 1
    
    return distribution




[docs]
def mean_centrality(
    graph: 'IMGraph',
    seeds: Set[int],
    centrality_type: str = 'degree'
) -> float:
    """计算种子节点的平均中心性。
    
    Args:
        graph: 图对象。
        seeds: 种子节点集合。
        centrality_type: 中心性类型，可选：
            - 'degree': 度中心性
            - 'in_degree': 入度中心性
            - 'out_degree': 出度中心性
    
    Returns:
        float: 平均中心性值。
    
    Example:
        >>> from pynetim.evaluation import mean_centrality
        >>> centrality = mean_centrality(graph, seeds, centrality_type='degree')
    """
    if len(seeds) == 0:
        return 0.0
    
    n = graph.num_nodes
    
    if centrality_type == 'degree':
        centralities = [graph.out_degree(seed) / (n - 1) for seed in seeds]
    elif centrality_type == 'in_degree':
        centralities = [graph.in_degree(seed) / (n - 1) for seed in seeds]
    elif centrality_type == 'out_degree':
        centralities = [graph.out_degree(seed) / (n - 1) for seed in seeds]
    else:
        raise ValueError(f"Unknown centrality type: {centrality_type}")
    
    return float(np.mean(centralities))




[docs]
def seed_overlap(
    seeds1: Set[int],
    seeds2: Set[int]
) -> float:
    """计算两组种子节点的重叠率。
    
    Jaccard相似度 = |S1 ∩ S2| / |S1 ∪ S2|
    
    Args:
        seeds1: 第一组种子节点。
        seeds2: 第二组种子节点。
    
    Returns:
        float: 重叠率，范围 [0, 1]。
    
    Example:
        >>> from pynetim.evaluation import seed_overlap
        >>> overlap = seed_overlap(seeds1, seeds2)
        >>> print(f"Overlap: {overlap:.2%}")
    """
    if len(seeds1) == 0 and len(seeds2) == 0:
        return 1.0
    
    intersection = len(seeds1 & seeds2)
    union = len(seeds1 | seeds2)
    
    return intersection / union if union > 0 else 0.0




[docs]
def seed_diversity(
    graph: 'IMGraph',
    seeds: Set[int]
) -> float:
    """计算种子节点的多样性。
    
    基于种子节点之间的平均距离评估多样性。
    
    Args:
        graph: 图对象。
        seeds: 种子节点集合。
    
    Returns:
        float: 多样性得分，范围 [0, 1]。
            - 1.0 表示种子节点分布非常分散
            - 0.0 表示种子节点非常集中
    
    Example:
        >>> from pynetim.evaluation import seed_diversity
        >>> diversity = seed_diversity(graph, seeds)
    """
    from .influence_metrics import average_shortest_distance
    
    if len(seeds) < 2:
        return 0.0
    
    avg_distance = average_shortest_distance(graph, seeds)
    
    if avg_distance < 0:
        return 0.0
    
    max_possible_distance = graph.num_nodes - 1
    
    return min(avg_distance / max_possible_distance, 1.0)




[docs]
def weight_statistics(
    graph: 'IMGraph',
    seeds: Set[int]
) -> Dict[str, float]:
    """计算种子节点相关边的权重统计信息。
    
    Args:
        graph: 图对象。
        seeds: 种子节点集合。
    
    Returns:
        Dict[str, float]: 包含以下统计量：
            - mean_weight: 平均权重
            - max_weight: 最大权重
            - min_weight: 最小权重
            - total_weight: 总权重
    
    Example:
        >>> from pynetim.evaluation import weight_statistics
        >>> stats = weight_statistics(graph, seeds)
        >>> print(f"Mean weight: {stats['mean_weight']:.4f}")
    """
    if len(seeds) == 0:
        return {
            'mean_weight': 0.0,
            'max_weight': 0.0,
            'min_weight': 0.0,
            'total_weight': 0.0
        }
    
    weights = []
    for seed in seeds:
        for neighbor in graph.out_neighbors(seed):
            weight = graph.get_edge_weight(seed, neighbor)
            if weight is not None:
                weights.append(weight)
    
    if len(weights) == 0:
        return {
            'mean_weight': 0.0,
            'max_weight': 0.0,
            'min_weight': 0.0,
            'total_weight': 0.0
        }
    
    return {
        'mean_weight': float(np.mean(weights)),
        'max_weight': float(np.max(weights)),
        'min_weight': float(np.min(weights)),
        'total_weight': float(np.sum(weights))
    }




[docs]
def clustering_coefficient(
    graph: 'IMGraph',
    seeds: Set[int]
) -> float:
    """计算种子节点的平均聚类系数。
    
    聚类系数衡量节点邻居之间的连接密度。
    
    Args:
        graph: 图对象。
        seeds: 种子节点集合。
    
    Returns:
        float: 平均聚类系数，范围 [0, 1]。
    
    Example:
        >>> from pynetim.evaluation import clustering_coefficient
        >>> cc = clustering_coefficient(graph, seeds)
        >>> print(f"Clustering coefficient: {cc:.4f}")
    """
    if len(seeds) == 0:
        return 0.0
    
    coefficients = []
    
    for seed in seeds:
        neighbors = list(graph.out_neighbors(seed))
        k = len(neighbors)
        
        if k < 2:
            coefficients.append(0.0)
            continue
        
        actual_edges = 0
        for i in range(len(neighbors)):
            for j in range(i + 1, len(neighbors)):
                if graph.has_edge(neighbors[i], neighbors[j]):
                    actual_edges += 1
        
        possible_edges = k * (k - 1) / 2
        cc = actual_edges / possible_edges if possible_edges > 0 else 0.0
        coefficients.append(cc)
    
    return float(np.mean(coefficients))