import gradio as gr from pathlib import Path from backend.settings import settings from backend.ingest import ingest_all from backend.rag import retriever from backend.llm_local import generate from backend.summarizer import extractive_sents, map_reduce settings.RAW_DIR.mkdir(parents=True, exist_ok=True) settings.INDEX_DIR.mkdir(parents=True, exist_ok=True) STATE_INDEXED = False def list_uploaded_files(): files = [] for f in settings.RAW_DIR.glob("**/*"): if f.is_file(): files.append(str(f.relative_to(settings.RAW_DIR))) return sorted(files) def upload_files(files): for f in files: dest = settings.RAW_DIR / f.name dest.parent.mkdir(parents=True, exist_ok=True) f.save(str(dest)) return gr.update(choices=list_uploaded_files(), value=None), "Upload feito!" def build_index(): global STATE_INDEXED if not (settings.INDEX_DIR / "meta.jsonl").exists(): ingest_all() retriever.build() retriever.load() STATE_INDEXED = True return "Vá para a aba Conversar :) " def chat_answer(history, message): if not STATE_INDEXED: try: retriever.load() except Exception: return history + [("system","ERRO!")], "" hits = retriever.search(message, top_k=settings.TOP_K) ctx = hits[:settings.TOP_K_RERANK] ans = generate(message, ctx) history = history + [(message, ans)] return history, "" def summarize_run(filename, pages, chapter, query, style, length): if not STATE_INDEXED: try: retriever.load() except Exception: return "ERRO!" page_start = page_end = None if pages: try: a,b = pages.split("-"); page_start, page_end = int(a), int(b) except: return "Formato de páginas inválido. Use '10-30'." if chapter and filename: rng = retriever.find_chapter_range(filename, chapter) if not rng: return "NÃO ENCONTRADO" page_start, page_end = rng["start"], rng["end"] hits = retriever.search(query or "resumo", top_k=16, filename=filename or None, page_start=page_start, page_end=page_end) if not hits: return "NÃO ENCONTRADO" sents = extractive_sents(hits, query, max_sents=80) if sents: synth = [{"text": s, "meta": {"page_num": p}} for s,p in sents] final = map_reduce(synth, focus=query or "resumo", style=style, length=length) else: final = map_reduce(hits, focus=query or "resumo", style=style, length=length) return final with gr.Blocks(title="Agente de Estudos IA") as demo: gr.Markdown("# Agente de Estudos IA \nCarregue seu **livro** e **slides** e estude com o bot.") with gr.Tab("Upload & Indexar"): files_in = gr.File(label="Envie PDFs / PPTX / TXT", file_count="multiple", type="filepath") uploaded_list = gr.Dropdown(label="Arquivos no índice", choices=list_uploaded_files(), interactive=False) upload_btn = gr.Button("Fazer upload") index_btn = gr.Button("Indexar (parse → embeddings → FAISS)") status = gr.Markdown() upload_btn.click(upload_files, inputs=files_in, outputs=[uploaded_list, status]) index_btn.click(build_index, outputs=status) with gr.Tab("Conversar"): chatbot = gr.Chatbot(height=420) msg = gr.Textbox(placeholder="Pergunte algo (ex.: O que é heapify?)") send = gr.Button("Enviar", variant="primary") clear = gr.Button("Limpar chat") send.click(chat_answer, inputs=[chatbot, msg], outputs=[chatbot, msg]) clear.click(lambda: [], outputs=chatbot) with gr.Tab("Resumir"): file_dd = gr.Dropdown(label="Arquivo (opcional, senão busca em todos)", choices=list_uploaded_files()) pages_tb = gr.Textbox(label="Páginas (ex.: 12-30)", placeholder="ex.: 45-67") chapter_tb = gr.Textbox(label="Capítulo (opcional, tenta usar TOC do PDF)") query_tb = gr.Textbox(label="Foco do resumo", placeholder="definições, exemplos e complexidades") style_dd = gr.Dropdown(choices=["bullets", "esquema", "discursivo"], value="bullets", label="Estilo") length_dd = gr.Dropdown(choices=["curto", "médio", "longo"], value="médio", label="Tamanho") run_btn = gr.Button("Gerar resumo", variant="primary") out_md = gr.Markdown() run_btn.click(summarize_run, inputs=[file_dd, pages_tb, chapter_tb, query_tb, style_dd, length_dd], outputs=out_md) demo.queue().launch()