#!/usr/bin/env python3 # filepath: /home/skvrnjan/hoho/find_best_results.py # This script scans a directory for result files (text files typically starting # with "results_vt" within subdirectories matching a given prefix). # It parses these files to extract metrics like Mean HSS, Mean F1, Mean IoU, # Vertex Threshold, Edge Threshold, and Only Predicted Connections. # The script then identifies and prints the top N results (default N=10) # for Mean HSS, Mean F1, and Mean IoU, along with their associated configuration # parameters. import os import re N_TOP = 10 # Number of top results to keep for each category def parse_result_file(filepath): """ Parse the result file and return a dictionary with the metrics: { 'Mean HSS': float, 'Mean F1': float, 'Mean IoU': float, 'Vertex Threshold': float, 'Edge Threshold': float, 'Only Predicted Connections': bool } """ metrics = { 'Mean HSS': 0.0, 'Mean F1': 0.0, 'Mean IoU': 0.0, 'Vertex Threshold': None, 'Edge Threshold': None, 'Only Predicted Connections': None } with open(filepath, 'r') as f: lines = f.readlines() for line in lines: if line.startswith("Mean HSS:"): parts = line.split() metrics['Mean HSS'] = float(parts[-1]) elif line.startswith("Mean F1:"): parts = line.split() metrics['Mean F1'] = float(parts[-1]) elif line.startswith("Mean IoU:"): parts = line.split() metrics['Mean IoU'] = float(parts[-1]) elif line.startswith("Configuration:"): config_line = line.partition("Configuration:")[2].strip() vt_match = re.search(r"vertex_threshold': ([0-9.]+)", config_line) et_match = re.search(r"edge_threshold': ([0-9.]+)", config_line) opc_match = re.search(r"only_predicted_connections': (True|False)", config_line) if vt_match: metrics['Vertex Threshold'] = float(vt_match.group(1)) if et_match: metrics['Edge Threshold'] = float(et_match.group(1)) if opc_match: metrics['Only Predicted Connections'] = True if opc_match.group(1) == "True" else False return metrics def add_to_top_n(top_n_list, item_details): """ Adds an item to a list maintaining the top N items sorted by 'value'. item_details: dict with 'value', 'file', 'metrics'. top_n_list: list of such dicts, kept sorted by 'value' descending. Uses global N_TOP. """ if len(top_n_list) < N_TOP: top_n_list.append(item_details) top_n_list.sort(key=lambda x: x['value'], reverse=True) elif item_details['value'] > top_n_list[-1]['value']: # Assumes list is sorted top_n_list.pop() # Remove the worst top_n_list.append(item_details) # Add the new one top_n_list.sort(key=lambda x: x['value'], reverse=True) # Re-sort def print_top_n_results(category_name, top_n_list): """ Prints the top N results for a given category. """ if not top_n_list: print(f"No valid results found for {category_name}.") return print(f"\n--- Top {len(top_n_list)} by {category_name} (up to {N_TOP}) ---") for i, item in enumerate(top_n_list): metrics = item['metrics'] print(f" Rank {i+1}:") print(f" File: {item['file']}") print(f" {category_name}: {item['value']:.4f}") # Primary metric for this category # Display other metrics for context all_metrics_keys = ['Mean HSS', 'Mean F1', 'Mean IoU'] for key in all_metrics_keys: if key != category_name: # Avoid printing the primary metric again print(f" {key}: {metrics.get(key, 0.0):.4f}") # Use .get for safety, default to 0.0 if missing print(f" Vertex Threshold: {metrics.get('Vertex Threshold', 'N/A')}") print(f" Edge Threshold: {metrics.get('Edge Threshold', 'N/A')}") print(f" Only Predicted Connections: {metrics.get('Only Predicted Connections', 'N/A')}") def main(results_dir, folder_prefix="v4"): top_hss_results = [] # List of {'value': float, 'file': str, 'metrics': dict} top_f1_results = [] top_iou_results = [] files_scanned = 0 for root, dirs, files in os.walk(results_dir): # Filter directories to only process those starting with the specified prefix dirs[:] = [d for d in dirs if d.startswith(folder_prefix)] # Only process files if we're in a valid directory (or subdirectory of one) rel_root = os.path.relpath(root, results_dir) if rel_root != '.' and not any(part.startswith(folder_prefix) for part in rel_root.split(os.sep)): continue for file in files: if file.endswith(".txt") and file.startswith("results_vt"): files_scanned += 1 filepath = os.path.join(root, file) try: parsed_metrics = parse_result_file(filepath) relative_filepath = os.path.relpath(filepath, results_dir) # Store item details for HSS hss_item = {'value': parsed_metrics['Mean HSS'], 'file': relative_filepath, 'metrics': parsed_metrics} add_to_top_n(top_hss_results, hss_item) # Store item details for F1 f1_item = {'value': parsed_metrics['Mean F1'], 'file': relative_filepath, 'metrics': parsed_metrics} add_to_top_n(top_f1_results, f1_item) # Store item details for IoU iou_item = {'value': parsed_metrics['Mean IoU'], 'file': relative_filepath, 'metrics': parsed_metrics} add_to_top_n(top_iou_results, iou_item) except Exception as e: print(f"Error processing {filepath}: {e}") continue print(f"Files scanned: {files_scanned}") if files_scanned == 0: print(f"No result files found in {results_dir} (folders starting with '{folder_prefix}')") return print_top_n_results("Mean HSS", top_hss_results) print_top_n_results("Mean F1", top_f1_results) print_top_n_results("Mean IoU", top_iou_results) if __name__ == "__main__": import sys if len(sys.argv) > 1: results_dir = sys.argv[1] else: results_dir = "/path/to/your/results/directory" # MODIFIED: Placeholder for results directory # You can specify a different folder prefix as the second argument folder_prefix = sys.argv[2] if len(sys.argv) > 2 else "" main(results_dir, folder_prefix)