#!/usr/bin/env python3
# filepath: /home/skvrnjan/hoho/find_best_results.py
# This script scans a directory for result files (text files typically starting
# with "results_vt" within subdirectories matching a given prefix).
# It parses these files to extract metrics like Mean HSS, Mean F1, Mean IoU,
# Vertex Threshold, Edge Threshold, and Only Predicted Connections.
# The script then identifies and prints the top N results (default N=10)
# for Mean HSS, Mean F1, and Mean IoU, along with their associated configuration
# parameters.
import os
import re

N_TOP = 10  # Number of top results to keep for each category

def parse_result_file(filepath):
    """
    Parse the result file and return a dictionary with the metrics:
    {
        'Mean HSS': float,
        'Mean F1': float,
        'Mean IoU': float,
        'Vertex Threshold': float,
        'Edge Threshold': float,
        'Only Predicted Connections': bool
    }
    """
    metrics = {
        'Mean HSS': 0.0,
        'Mean F1': 0.0,
        'Mean IoU': 0.0,
        'Vertex Threshold': None,
        'Edge Threshold': None,
        'Only Predicted Connections': None
    }
    with open(filepath, 'r') as f:
        lines = f.readlines()
        for line in lines:
            if line.startswith("Mean HSS:"):
                parts = line.split()
                metrics['Mean HSS'] = float(parts[-1])
            elif line.startswith("Mean F1:"):
                parts = line.split()
                metrics['Mean F1'] = float(parts[-1])
            elif line.startswith("Mean IoU:"):
                parts = line.split()
                metrics['Mean IoU'] = float(parts[-1])
            elif line.startswith("Configuration:"):
                config_line = line.partition("Configuration:")[2].strip()
                vt_match = re.search(r"vertex_threshold': ([0-9.]+)", config_line)
                et_match = re.search(r"edge_threshold': ([0-9.]+)", config_line)
                opc_match = re.search(r"only_predicted_connections': (True|False)", config_line)
                if vt_match:
                    metrics['Vertex Threshold'] = float(vt_match.group(1))
                if et_match:
                    metrics['Edge Threshold'] = float(et_match.group(1))
                if opc_match:
                    metrics['Only Predicted Connections'] = True if opc_match.group(1) == "True" else False
    return metrics

def add_to_top_n(top_n_list, item_details):
    """
    Adds an item to a list maintaining the top N items sorted by 'value'.
    item_details: dict with 'value', 'file', 'metrics'.
    top_n_list: list of such dicts, kept sorted by 'value' descending.
    Uses global N_TOP.
    """
    if len(top_n_list) < N_TOP:
        top_n_list.append(item_details)
        top_n_list.sort(key=lambda x: x['value'], reverse=True)
    elif item_details['value'] > top_n_list[-1]['value']: # Assumes list is sorted
        top_n_list.pop()  # Remove the worst
        top_n_list.append(item_details)  # Add the new one
        top_n_list.sort(key=lambda x: x['value'], reverse=True)  # Re-sort

def print_top_n_results(category_name, top_n_list):
    """
    Prints the top N results for a given category.
    """
    if not top_n_list:
        print(f"No valid results found for {category_name}.")
        return

    print(f"\n--- Top {len(top_n_list)} by {category_name} (up to {N_TOP}) ---")
    for i, item in enumerate(top_n_list):
        metrics = item['metrics']
        print(f"  Rank {i+1}:")
        print(f"    File: {item['file']}")
        print(f"    {category_name}: {item['value']:.4f}") # Primary metric for this category
        
        # Display other metrics for context
        all_metrics_keys = ['Mean HSS', 'Mean F1', 'Mean IoU']
        for key in all_metrics_keys:
            if key != category_name: # Avoid printing the primary metric again
                print(f"    {key}: {metrics.get(key, 0.0):.4f}") # Use .get for safety, default to 0.0 if missing
        
        print(f"    Vertex Threshold: {metrics.get('Vertex Threshold', 'N/A')}")
        print(f"    Edge Threshold: {metrics.get('Edge Threshold', 'N/A')}")
        print(f"    Only Predicted Connections: {metrics.get('Only Predicted Connections', 'N/A')}")

def main(results_dir, folder_prefix="v4"):
    top_hss_results = []  # List of {'value': float, 'file': str, 'metrics': dict}
    top_f1_results = []
    top_iou_results = []
    
    files_scanned = 0
    
    for root, dirs, files in os.walk(results_dir):
        # Filter directories to only process those starting with the specified prefix
        dirs[:] = [d for d in dirs if d.startswith(folder_prefix)]
        
        # Only process files if we're in a valid directory (or subdirectory of one)
        rel_root = os.path.relpath(root, results_dir)
        if rel_root != '.' and not any(part.startswith(folder_prefix) for part in rel_root.split(os.sep)):
            continue
            
        for file in files:
            if file.endswith(".txt") and file.startswith("results_vt"):
                files_scanned += 1
                filepath = os.path.join(root, file)
                try:
                    parsed_metrics = parse_result_file(filepath)
                    relative_filepath = os.path.relpath(filepath, results_dir)
                    
                    # Store item details for HSS
                    hss_item = {'value': parsed_metrics['Mean HSS'], 
                                'file': relative_filepath, 
                                'metrics': parsed_metrics}
                    add_to_top_n(top_hss_results, hss_item)
                    
                    # Store item details for F1
                    f1_item = {'value': parsed_metrics['Mean F1'], 
                               'file': relative_filepath, 
                               'metrics': parsed_metrics}
                    add_to_top_n(top_f1_results, f1_item)

                    # Store item details for IoU
                    iou_item = {'value': parsed_metrics['Mean IoU'], 
                                'file': relative_filepath, 
                                'metrics': parsed_metrics}
                    add_to_top_n(top_iou_results, iou_item)

                except Exception as e:
                    print(f"Error processing {filepath}: {e}")
                    continue
    
    print(f"Files scanned: {files_scanned}")
    
    if files_scanned == 0:
        print(f"No result files found in {results_dir} (folders starting with '{folder_prefix}')")
        return
    
    print_top_n_results("Mean HSS", top_hss_results)
    print_top_n_results("Mean F1", top_f1_results)
    print_top_n_results("Mean IoU", top_iou_results)

if __name__ == "__main__":
    import sys
    if len(sys.argv) > 1:
        results_dir = sys.argv[1]
    else:
        results_dir = "/path/to/your/results/directory"  # MODIFIED: Placeholder for results directory
    
    # You can specify a different folder prefix as the second argument
    folder_prefix = sys.argv[2] if len(sys.argv) > 2 else ""
    
    main(results_dir, folder_prefix)