Spaces:
Paused
Paused
| import os | |
| import json | |
| import pandas as pd | |
| import argparse | |
| def process_csv(num, input_csv, output_csv, column_name): | |
| # Read the input CSV file | |
| try: | |
| df = pd.read_csv(input_csv) | |
| except Exception as e: | |
| print(f"Error reading {input_csv}: {e}") | |
| return | |
| # Add the target column | |
| df[column_name] = None | |
| # Process each row by using the index to construct the JSON file path | |
| for idx, row in df.iterrows(): | |
| json_path = os.path.join(os.path.splitext(input_csv)[0], f"metadata_{idx}_iter_{num}.json") | |
| # Check if the file exists | |
| if not os.path.exists(json_path): | |
| print(f"File not found: {json_path}") | |
| df.at[idx, column_name] = None | |
| continue | |
| # Open and read the JSON file | |
| try: | |
| with open(json_path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| except Exception as e: | |
| print(f"Error reading {json_path}: {e}") | |
| df.at[idx, column_name] = None | |
| continue | |
| # Extract the value from final_translations_record | |
| final_record = data.get("final_translations_record", []) | |
| if isinstance(final_record, list) and len(final_record) > 0: | |
| value = final_record[0] | |
| else: | |
| value = None | |
| # Write the value into the target column | |
| df.at[idx, column_name] = value | |
| # Save the result to output CSV | |
| try: | |
| df.to_csv(output_csv, index=False) | |
| print(f"Saved successfully: {output_csv}") | |
| except Exception as e: | |
| print(f"Error saving {output_csv}: {e}") | |
| # Example command: python memory2csv.py --num 5 --input_csv valid_en_ja.csv --output_csv eval_en_ja.csv --column_name mpc | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser(description='Process CSV and extract data from JSON files.') | |
| parser.add_argument('--num', type=int, required=True, help='Iteration number used in JSON filenames') | |
| parser.add_argument('--input_csv', type=str, required=True, help='Path to input CSV file') | |
| parser.add_argument('--output_csv', type=str, required=True, help='Path to save the output CSV file') | |
| parser.add_argument('--column_name', type=str, required=True, help='Column name to store extracted values') | |
| args = parser.parse_args() | |
| process_csv(args.num, args.input_csv, args.output_csv, args.column_name) |