estudo-agente / app.py
natalieac's picture
Update app.py
aba5616 verified
import gradio as gr
import shutil
from typing import Any
from pathlib import Path
from backend.settings import settings
from backend.ingest import ingest_all
from backend.rag import retriever
from backend.llm_local import generate
from backend.summarizer import extractive_sents, map_reduce
for p in [settings.RAW_DIR, settings.INDEX_DIR, settings.MODEL_DIR]:
p.mkdir(parents=True, exist_ok=True)
def _index_files_exist() -> bool:
"""Checa se o índice FAISS e o cache existem."""
from backend.rag import retriever
return (retriever.index_path.exists() if hasattr(retriever, "index_path") else False) or \
(settings.INDEX_DIR / "faiss.index").exists()
def _raw_has_files() -> bool:
return any(settings.RAW_DIR.rglob("*.*"))
def try_autoload_index() -> str | None:
"""
Se já houver índice, carrega. Se houver arquivos em RAW mas não índice,
tenta construir automaticamente.
"""
global STATE_INDEXED
try:
if _index_files_exist():
retriever.load()
STATE_INDEXED = True
return None
elif _raw_has_files():
ingest_all()
retriever.build()
retriever.load()
STATE_INDEXED = True
return None
else:
STATE_INDEXED = False
return "Nenhum arquivo em backend/data/raw/. Envie arquivos (ou comite nessa pasta) e clique em Indexar."
except Exception as e:
STATE_INDEXED = False
return f"Falha ao preparar índice automaticamente: {type(e).__name__}{e}"
STATE_INDEXED = False
def list_uploaded_files():
files = []
for f in settings.RAW_DIR.glob("**/*"):
if f.is_file():
files.append(str(f.relative_to(settings.RAW_DIR)))
return sorted(files)
def _save_one(file_obj: Any) -> str:
"""
Salva um item vindo do componente gr.File, qualquer que seja o formato:
- str/Path (caminho no disco)
- objeto com .path
- objeto com .save()
- objeto com .name / .orig_name e .read()
Retorna o nome salvo.
"""
dest_dir = settings.RAW_DIR
dest_dir.mkdir(parents=True, exist_ok=True)
if isinstance(file_obj, (str, Path)):
src = Path(file_obj)
dest = dest_dir / src.name
shutil.copy(src, dest)
return dest.name
path = getattr(file_obj, "path", None)
if path:
src = Path(path)
name = getattr(file_obj, "orig_name", None) or getattr(file_obj, "name", None) or src.name
dest = dest_dir / name
shutil.copy(src, dest)
return dest.name
saver = getattr(file_obj, "save", None)
if callable(saver):
name = getattr(file_obj, "orig_name", None) or getattr(file_obj, "name", None) or "arquivo_subido"
dest = dest_dir / name
saver(str(dest))
return dest.name
reader = getattr(file_obj, "read", None)
if callable(reader):
data = reader()
name = getattr(file_obj, "orig_name", None) or getattr(file_obj, "name", None) or "arquivo_subido"
dest = dest_dir / name
with open(dest, "wb") as f:
f.write(data)
return dest.name
raise ValueError("Formato de arquivo não reconhecido pelo upload.")
def upload_files(files):
try:
saved = []
for item in (files or []):
saved.append(_save_one(item))
msg = "Upload concluído: " + (", ".join(saved) if saved else "nenhum arquivo")
return gr.update(choices=list_uploaded_files(), value=None), msg + ". Agora clique em **Indexar**."
except Exception as e:
return gr.update(choices=list_uploaded_files(), value=None), f"Falha no upload: {type(e).__name__}: {e}"
def build_index():
"""Ingestão (parse) + construção e carga do índice. Mostra erro detalhado no UI."""
global STATE_INDEXED
try:
if not _raw_has_files():
STATE_INDEXED = False
return "Nenhum arquivo encontrado em backend/data/raw/. Adicione arquivos ou faça upload."
if not (settings.INDEX_DIR / "meta.jsonl").exists():
ingest_all()
retriever.build()
retriever.load()
STATE_INDEXED = True
return "Índice criado/carregado. Vá para **Conversar** ou **Resumir**."
except Exception as e:
STATE_INDEXED = False
return f"Falha ao indexar/carregar: **{type(e).__name__}** — {e}"
def chat_answer(history, message):
if not history:
history = []
if not STATE_INDEXED:
warn = try_autoload_index()
if warn:
return history + [("system", warn + "\nVá na aba **Upload & Indexar** e clique em **Indexar**.")], ""
try:
hits = retriever.search(message, top_k=settings.TOP_K)
if not hits:
ans = "Não encontrei trechos relevantes. Verifique se você **indexou** os arquivos."
else:
ctx = hits[:settings.TOP_K_RERANK]
ans = generate(message, ctx)
history = history + [(message, ans)]
return history, ""
except Exception as e:
return history + [("system", f"Falha na busca/resposta: **{type(e).__name__}** — {e}")], ""
def summarize_run(filename, pages, chapter, query, style, length):
global STATE_INDEXED
if not STATE_INDEXED:
warn = None
try:
warn = try_autoload_index()
except NameError:
try:
retriever.load()
STATE_INDEXED = True
except Exception as e:
return (
f"Índice não está pronto: **{type(e).__name__}** — {e}\n"
"Vá na aba **Upload & Indexar** e clique em **Indexar**."
)
if warn:
return warn + "\nVá na aba **Upload & Indexar** e clique em **Indexar**."
page_start = page_end = None
if pages:
try:
a, b = pages.split("-")
page_start, page_end = int(a), int(b)
except:
return "Formato de páginas inválido. Use '10-30'."
if chapter and filename:
rng = retriever.find_chapter_range(filename, chapter)
if not rng:
return "NÃO ENCONTRADO"
page_start, page_end = rng["start"], rng["end"]
try:
hits = retriever.search(
query or "resumo",
top_k=16,
filename=filename or None,
page_start=page_start,
page_end=page_end,
)
except Exception as e:
return f"Falha na busca: **{type(e).__name__}** — {e}"
if not hits:
return "NÃO ENCONTRADO"
try:
sents = extractive_sents(hits, query, max_sents=30)
if sents:
synth = [{"text": s, "meta": {"page_num": p}} for s, p in sents]
final = map_reduce(synth, focus=query or "resumo", style=style, length=length)
else:
final = map_reduce(hits[:8], focus=query or "resumo", style=style, length=length)
return final
except Exception as e:
return f"Falha ao resumir: **{type(e).__name__}** — {e}"
with gr.Blocks(title="Agente de Estudos IA") as demo:
gr.Markdown("# Agente de Estudos IA \nCarregue seu **livro** e **slides** e estude com o bot.")
with gr.Tab("Upload & Indexar"):
files_in = gr.File(label="Envie PDFs / PPTX / TXT", file_count="multiple")
uploaded_list = gr.Dropdown(label="Arquivos no índice", choices=list_uploaded_files(), interactive=False)
upload_btn = gr.Button("Fazer upload")
index_btn = gr.Button("Indexar (parse → embeddings → FAISS)")
status = gr.Markdown()
upload_btn.click(upload_files, inputs=files_in, outputs=[uploaded_list, status])
index_btn.click(build_index, outputs=status)
with gr.Tab("Conversar"):
chatbot = gr.Chatbot(height=420)
msg = gr.Textbox(placeholder="Pergunte algo")
send = gr.Button("Enviar", variant="primary")
clear = gr.Button("Limpar chat")
send.click(chat_answer, inputs=[chatbot, msg], outputs=[chatbot, msg])
clear.click(lambda: [], outputs=chatbot)
with gr.Tab("Resumir"):
file_dd = gr.Dropdown(label="Arquivo (opcional, senão busca em todos)", choices=list_uploaded_files())
pages_tb = gr.Textbox(label="Páginas (ex.: 12-30)", placeholder="ex.: 45-67")
chapter_tb = gr.Textbox(label="Capítulo (opcional, tenta usar TOC do PDF)")
query_tb = gr.Textbox(label="Foco do resumo", placeholder="definições, exemplos e complexidades")
style_dd = gr.Dropdown(choices=["bullets", "esquema", "discursivo"], value="bullets", label="Estilo")
length_dd = gr.Dropdown(choices=["curto", "médio", "longo"], value="médio", label="Tamanho")
run_btn = gr.Button("Gerar resumo", variant="primary")
out_md = gr.Markdown()
run_btn.click(summarize_run,
inputs=[file_dd, pages_tb, chapter_tb, query_tb, style_dd, length_dd],
outputs=out_md)
demo.queue().launch()