Upload 35 files
Browse files- .gitignore +9 -0
- LICENSE +7 -0
- README.md +17 -3
- data/samples/sample.jsonl +1 -0
- deployment/Dockerfile +7 -0
- deployment/api_service.py +3 -0
- deployment/gradio_service.py +3 -0
- deployment/huggingface_spaces/app.py +18 -0
- deployment/requirements_docker.txt +2 -0
- models/README.md +4 -0
- notebooks/01_data_exploration.ipynb +1 -0
- notebooks/02_preprocessing.ipynb +1 -0
- notebooks/03_model_finetuning.ipynb +1 -0
- notebooks/04_evaluation.ipynb +1 -0
- notebooks/05_inference_demo.ipynb +1 -0
- requirements.txt +12 -0
- setup.py +8 -0
- src/__init__.py +1 -0
- src/app/gradio_demo.py +19 -0
- src/app/main.py +19 -0
- src/config/model_config.yaml +11 -0
- src/config/training_args.yaml +9 -0
- src/data/data_loader.py +21 -0
- src/data/preprocessing.py +15 -0
- src/model/evaluate_model.py +11 -0
- src/model/inference.py +22 -0
- src/model/model_builder.py +12 -0
- src/model/train_model.py +14 -0
- src/utils/helper.py +3 -0
- src/utils/logger.py +4 -0
- src/utils/metrics.py +6 -0
- tests/test_api.py +15 -0
- tests/test_data.py +7 -0
- tests/test_inference.py +8 -0
- tests/test_model.py +6 -0
.gitignore
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
.venv/
|
| 3 |
+
.env
|
| 4 |
+
*.pyc
|
| 5 |
+
*.pkl
|
| 6 |
+
*.pt
|
| 7 |
+
experiments/
|
| 8 |
+
models/
|
| 9 |
+
.DS_Store
|
LICENSE
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 AutoCodeFix
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction... (full MIT text should be placed here)
|
README.md
CHANGED
|
@@ -1,3 +1,17 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AutoCodeFix (Lightweight Functional Version)
|
| 2 |
+
|
| 3 |
+
This is a lightweight, fully-runnable scaffold of the AutoCodeFix project. It contains example code,
|
| 4 |
+
dummy model logic (simulated), minimal FastAPI and Gradio interfaces, and tests so you can run locally
|
| 5 |
+
and upload to Hugging Face Spaces. The model folders are intentionally left empty for you to add real
|
| 6 |
+
model weights later.
|
| 7 |
+
|
| 8 |
+
## Quickstart (local)
|
| 9 |
+
```bash
|
| 10 |
+
python -m venv .venv
|
| 11 |
+
source .venv/bin/activate # on Windows use: .venv\Scripts\activate
|
| 12 |
+
pip install -r requirements.txt
|
| 13 |
+
# Run API
|
| 14 |
+
uvicorn src.app.main:app --reload
|
| 15 |
+
# Or run Gradio demo
|
| 16 |
+
python src/app/gradio_demo.py
|
| 17 |
+
```
|
data/samples/sample.jsonl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"id": "1", "language": "python", "buggy": "def add(a, b)\\n return a + b", "fixed": "def add(a, b):\\n return a + b", "error_log": "SyntaxError: invalid syntax"}\n
|
deployment/Dockerfile
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
WORKDIR /app
|
| 3 |
+
COPY . /app
|
| 4 |
+
RUN pip install --upgrade pip
|
| 5 |
+
RUN pip install -r requirements.txt
|
| 6 |
+
EXPOSE 8000
|
| 7 |
+
CMD ["uvicorn", "src.app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
deployment/api_service.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from uvicorn import run
|
| 2 |
+
if __name__ == '__main__':
|
| 3 |
+
run('src.app.main:app', host='0.0.0.0', port=8000)
|
deployment/gradio_service.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.app.gradio_demo import iface
|
| 2 |
+
if __name__ == '__main__':
|
| 3 |
+
iface.launch(server_name='0.0.0.0', server_port=7860)
|
deployment/huggingface_spaces/app.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from src.model.inference import AutoCodeFixer
|
| 3 |
+
|
| 4 |
+
fixer = AutoCodeFixer(model_dir=None)
|
| 5 |
+
|
| 6 |
+
def predict(buggy, error_log):
|
| 7 |
+
fixed, explanation = fixer.repair(buggy, error_log)
|
| 8 |
+
return fixed, explanation
|
| 9 |
+
|
| 10 |
+
iface = gr.Interface(
|
| 11 |
+
fn=predict,
|
| 12 |
+
inputs=[gr.Textbox(lines=15, placeholder='Buggy code'), gr.Textbox(lines=4, placeholder='Error log (optional)')],
|
| 13 |
+
outputs=[gr.Textbox(lines=15), gr.Textbox(lines=6)],
|
| 14 |
+
title='AutoCodeFix (Spaces - Lightweight)'
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
if __name__ == '__main__':
|
| 18 |
+
iface.launch()
|
deployment/requirements_docker.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-r ../requirements.txt
|
| 2 |
+
uvicorn[standard]
|
models/README.md
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
This folder contains placeholders for real model artifacts.
|
| 2 |
+
- models/starcoder2/ <-- leave empty until you upload model weights
|
| 3 |
+
- models/codet5plus/ <-- leave empty until you upload model weights
|
| 4 |
+
- models/tokenizer/ <-- leave empty until you upload tokenizer files
|
notebooks/01_data_exploration.ipynb
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Notebook placeholder\nThis is a placeholder notebook. Replace with full notebooks for EDA/training."]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|
notebooks/02_preprocessing.ipynb
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Notebook placeholder\nThis is a placeholder notebook. Replace with full notebooks for EDA/training."]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|
notebooks/03_model_finetuning.ipynb
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Notebook placeholder\nThis is a placeholder notebook. Replace with full notebooks for EDA/training."]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|
notebooks/04_evaluation.ipynb
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Notebook placeholder\nThis is a placeholder notebook. Replace with full notebooks for EDA/training."]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|
notebooks/05_inference_demo.ipynb
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Notebook placeholder\nThis is a placeholder notebook. Replace with full notebooks for EDA/training."]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|
requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
gradio
|
| 4 |
+
transformers
|
| 5 |
+
torch
|
| 6 |
+
datasets
|
| 7 |
+
pandas
|
| 8 |
+
numpy
|
| 9 |
+
pytest
|
| 10 |
+
diff-match-patch
|
| 11 |
+
pyyaml
|
| 12 |
+
peft
|
setup.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from setuptools import setup, find_packages
|
| 2 |
+
|
| 3 |
+
setup(
|
| 4 |
+
name='autocodefix',
|
| 5 |
+
version='0.1.0',
|
| 6 |
+
packages=find_packages('src'),
|
| 7 |
+
package_dir={'': 'src'},
|
| 8 |
+
)
|
src/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# AutoCodeFix package
|
src/app/gradio_demo.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from src.model.inference import AutoCodeFixer
|
| 3 |
+
|
| 4 |
+
fixer = AutoCodeFixer()
|
| 5 |
+
|
| 6 |
+
def run_fix(buggy, error_log):
|
| 7 |
+
fixed, explanation = fixer.repair(buggy, error_log)
|
| 8 |
+
return fixed, explanation
|
| 9 |
+
|
| 10 |
+
iface = gr.Interface(
|
| 11 |
+
fn=run_fix,
|
| 12 |
+
inputs=[gr.Textbox(lines=15, placeholder='Paste buggy code here'), gr.Textbox(lines=4, placeholder='Error log (optional)')],
|
| 13 |
+
outputs=[gr.Textbox(lines=15, label='Fixed Code'), gr.Textbox(lines=6, label='Explanation')],
|
| 14 |
+
title='AutoCodeFix (Lightweight)',
|
| 15 |
+
description='Paste buggy code and optional error log — model returns a repaired version and a brief explanation.'
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
if __name__ == '__main__':
|
| 19 |
+
iface.launch()
|
src/app/main.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from src.model.inference import AutoCodeFixer
|
| 4 |
+
|
| 5 |
+
app = FastAPI(title='AutoCodeFix API')
|
| 6 |
+
fixer = AutoCodeFixer()
|
| 7 |
+
|
| 8 |
+
class RepairRequest(BaseModel):
|
| 9 |
+
buggy: str
|
| 10 |
+
error_log: str = None
|
| 11 |
+
|
| 12 |
+
@app.post('/repair')
|
| 13 |
+
async def repair_code(req: RepairRequest):
|
| 14 |
+
fixed, explanation = fixer.repair(req.buggy, req.error_log)
|
| 15 |
+
return {'fixed': fixed, 'explanation': explanation}
|
| 16 |
+
|
| 17 |
+
@app.get('/')
|
| 18 |
+
async def root():
|
| 19 |
+
return {'message': 'AutoCodeFix is running (lightweight demo)'}
|
src/config/model_config.yaml
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_name: "Salesforce/codet5p-small"
|
| 2 |
+
task: "code-repair"
|
| 3 |
+
max_input_length: 1024
|
| 4 |
+
max_output_length: 512
|
| 5 |
+
learning_rate: 3e-5
|
| 6 |
+
batch_size: 8
|
| 7 |
+
num_train_epochs: 1
|
| 8 |
+
lora:
|
| 9 |
+
r: 8
|
| 10 |
+
alpha: 16
|
| 11 |
+
dropout: 0.05
|
src/config/training_args.yaml
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
output_dir: experiments/checkpoints
|
| 2 |
+
logging_dir: experiments/logs
|
| 3 |
+
save_steps: 500
|
| 4 |
+
eval_steps: 500
|
| 5 |
+
logging_steps: 100
|
| 6 |
+
per_device_train_batch_size: 4
|
| 7 |
+
per_device_eval_batch_size: 4
|
| 8 |
+
warmup_steps: 100
|
| 9 |
+
weight_decay: 0.01
|
src/data/data_loader.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, json
|
| 2 |
+
from datasets import Dataset
|
| 3 |
+
|
| 4 |
+
def load_jsonl_dataset(path: str):
|
| 5 |
+
files = []
|
| 6 |
+
if os.path.isdir(path):
|
| 7 |
+
for fname in os.listdir(path):
|
| 8 |
+
if fname.endswith('.jsonl') or fname.endswith('.json'):
|
| 9 |
+
files.append(os.path.join(path, fname))
|
| 10 |
+
elif os.path.isfile(path):
|
| 11 |
+
files = [path]
|
| 12 |
+
if not files:
|
| 13 |
+
raise FileNotFoundError(f'No data files found in {path}')
|
| 14 |
+
records = []
|
| 15 |
+
for f in files:
|
| 16 |
+
with open(f, 'r', encoding='utf-8') as fh:
|
| 17 |
+
for line in fh:
|
| 18 |
+
line=line.strip()
|
| 19 |
+
if not line: continue
|
| 20 |
+
records.append(json.loads(line))
|
| 21 |
+
return Dataset.from_list(records)
|
src/data/preprocessing.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoTokenizer
|
| 2 |
+
|
| 3 |
+
def preprocess_examples(examples, tokenizer: AutoTokenizer, max_input_length: int, max_output_length: int):
|
| 4 |
+
inputs = []
|
| 5 |
+
targets = []
|
| 6 |
+
for buggy, fixed, err in zip(examples.get('buggy', []), examples.get('fixed', []), examples.get('error_log', [])):
|
| 7 |
+
prompt = "### Buggy code:\n" + buggy
|
| 8 |
+
if err:
|
| 9 |
+
prompt += "\n### Error Log:\n" + err
|
| 10 |
+
inputs.append(prompt)
|
| 11 |
+
targets.append(fixed)
|
| 12 |
+
model_inputs = tokenizer(inputs, truncation=True, padding='max_length', max_length=max_input_length)
|
| 13 |
+
labels = tokenizer(targets, truncation=True, padding='max_length', max_length=max_output_length).input_ids
|
| 14 |
+
model_inputs['labels'] = labels
|
| 15 |
+
return model_inputs
|
src/model/evaluate_model.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.model.inference import AutoCodeFixer
|
| 2 |
+
|
| 3 |
+
def main():
|
| 4 |
+
fixer = AutoCodeFixer()
|
| 5 |
+
sample = 'def add(a,b)\n return a + b'
|
| 6 |
+
fixed, explanation = fixer.repair(sample)
|
| 7 |
+
print('Sample fixed code:\n', fixed)
|
| 8 |
+
print('Explanation:\n', explanation)
|
| 9 |
+
|
| 10 |
+
if __name__ == '__main__':
|
| 11 |
+
main()
|
src/model/inference.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
class AutoCodeFixer:
|
| 4 |
+
def __init__(self, model_dir: str = None):
|
| 5 |
+
# lightweight demo: no large model loaded
|
| 6 |
+
self.model_dir = model_dir
|
| 7 |
+
|
| 8 |
+
def _simple_fix(self, buggy: str):
|
| 9 |
+
# Fix missing colons in Python def/class, fix indentation of return if obviously wrong.
|
| 10 |
+
fixed = buggy
|
| 11 |
+
fixed = re.sub(r"def (\w+\(.*\))\\n\s+return", r"def \1:\n return", fixed)
|
| 12 |
+
fixed = re.sub(r"class (\w+)\\n", r"class \1:\n", fixed)
|
| 13 |
+
# ensure trailing newline
|
| 14 |
+
if not fixed.endswith('\n'):
|
| 15 |
+
fixed += '\n'
|
| 16 |
+
return fixed
|
| 17 |
+
|
| 18 |
+
def repair(self, buggy: str, error_log: str = None, max_new_tokens: int = 256):
|
| 19 |
+
# simple heuristic based repair
|
| 20 |
+
fixed = self._simple_fix(buggy)
|
| 21 |
+
explanation = 'Applied heuristic fixes: added missing colon(s) and normalized simple indentation.'
|
| 22 |
+
return fixed, explanation
|
src/model/model_builder.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 2 |
+
|
| 3 |
+
def build_model_and_tokenizer(model_name: str = 'Salesforce/codet5p-small'):
|
| 4 |
+
try:
|
| 5 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
| 6 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
| 7 |
+
except Exception as e:
|
| 8 |
+
# Fallback minimal dummy tokenizer/model for offline/demo
|
| 9 |
+
from transformers import AutoTokenizer
|
| 10 |
+
tokenizer = AutoTokenizer.from_pretrained('sshleifer/tiny-mbart', use_fast=True)
|
| 11 |
+
model = AutoModelForSeq2SeqLM.from_pretrained('sshleifer/tiny-mbart')
|
| 12 |
+
return model, tokenizer
|
src/model/train_model.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse, yaml
|
| 2 |
+
from src.model.model_builder import build_model_and_tokenizer
|
| 3 |
+
|
| 4 |
+
def main():
|
| 5 |
+
parser = argparse.ArgumentParser()
|
| 6 |
+
parser.add_argument('--config', type=str, default='src/config/model_config.yaml')
|
| 7 |
+
args = parser.parse_args()
|
| 8 |
+
with open(args.config, 'r') as fh:
|
| 9 |
+
cfg = yaml.safe_load(fh)
|
| 10 |
+
model, tokenizer = build_model_and_tokenizer(cfg.get('model_name'))
|
| 11 |
+
print('Loaded model and tokenizer (demo). This script is a scaffold for real training.')
|
| 12 |
+
|
| 13 |
+
if __name__ == '__main__':
|
| 14 |
+
main()
|
src/utils/helper.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import difflib
|
| 2 |
+
def unified_diff(a, b):
|
| 3 |
+
return '\n'.join(difflib.unified_diff(a.splitlines(), b.splitlines(), lineterm=''))
|
src/utils/logger.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
def get_logger(name=__name__):
|
| 3 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 4 |
+
return logging.getLogger(name)
|
src/utils/metrics.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def exact_match_score(preds, refs):
|
| 2 |
+
correct = 0
|
| 3 |
+
for p, r in zip(preds, refs):
|
| 4 |
+
if p.strip() == r.strip():
|
| 5 |
+
correct += 1
|
| 6 |
+
return correct / max(1, len(preds))
|
tests/test_api.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi.testclient import TestClient
|
| 2 |
+
from src.app.main import app
|
| 3 |
+
|
| 4 |
+
client = TestClient(app)
|
| 5 |
+
|
| 6 |
+
def test_root():
|
| 7 |
+
r = client.get('/')
|
| 8 |
+
assert r.status_code == 200
|
| 9 |
+
|
| 10 |
+
def test_repair_endpoint():
|
| 11 |
+
payload = {"buggy": "def add(a, b)\n return a + b"}
|
| 12 |
+
r = client.post('/repair', json=payload)
|
| 13 |
+
assert r.status_code == 200
|
| 14 |
+
data = r.json()
|
| 15 |
+
assert 'fixed' in data and 'explanation' in data
|
tests/test_data.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from src.data.data_loader import load_jsonl_dataset
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
def test_load_no_files_raises(tmp_path):
|
| 6 |
+
with pytest.raises(FileNotFoundError):
|
| 7 |
+
load_jsonl_dataset(str(tmp_path))
|
tests/test_inference.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.model.inference import AutoCodeFixer
|
| 2 |
+
|
| 3 |
+
def test_repair_returns_tuple():
|
| 4 |
+
fixer = AutoCodeFixer()
|
| 5 |
+
buggy = 'def add(a, b)\n return a + b'
|
| 6 |
+
fixed, explanation = fixer.repair(buggy)
|
| 7 |
+
assert isinstance(fixed, str)
|
| 8 |
+
assert isinstance(explanation, str)
|
tests/test_model.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.model.model_builder import build_model_and_tokenizer
|
| 2 |
+
|
| 3 |
+
def test_build_model_tokenizer():
|
| 4 |
+
model, tokenizer = build_model_and_tokenizer('sshleifer/tiny-mbart')
|
| 5 |
+
assert model is not None
|
| 6 |
+
assert tokenizer is not None
|