import gradio as gr import shutil from typing import Any from pathlib import Path from backend.settings import settings from backend.ingest import ingest_all from backend.rag import retriever from backend.llm_local import generate from backend.summarizer import extractive_sents, map_reduce for p in [settings.RAW_DIR, settings.INDEX_DIR, settings.MODEL_DIR]: p.mkdir(parents=True, exist_ok=True) def _index_files_exist() -> bool: """Checa se o índice FAISS e o cache existem.""" from backend.rag import retriever return (retriever.index_path.exists() if hasattr(retriever, "index_path") else False) or \ (settings.INDEX_DIR / "faiss.index").exists() def _raw_has_files() -> bool: return any(settings.RAW_DIR.rglob("*.*")) def try_autoload_index() -> str | None: """ Se já houver índice, carrega. Se houver arquivos em RAW mas não índice, tenta construir automaticamente. """ global STATE_INDEXED try: if _index_files_exist(): retriever.load() STATE_INDEXED = True return None elif _raw_has_files(): ingest_all() retriever.build() retriever.load() STATE_INDEXED = True return None else: STATE_INDEXED = False return "Nenhum arquivo em backend/data/raw/. Envie arquivos (ou comite nessa pasta) e clique em Indexar." except Exception as e: STATE_INDEXED = False return f"Falha ao preparar índice automaticamente: {type(e).__name__} — {e}" STATE_INDEXED = False def list_uploaded_files(): files = [] for f in settings.RAW_DIR.glob("**/*"): if f.is_file(): files.append(str(f.relative_to(settings.RAW_DIR))) return sorted(files) def _save_one(file_obj: Any) -> str: """ Salva um item vindo do componente gr.File, qualquer que seja o formato: - str/Path (caminho no disco) - objeto com .path - objeto com .save() - objeto com .name / .orig_name e .read() Retorna o nome salvo. """ dest_dir = settings.RAW_DIR dest_dir.mkdir(parents=True, exist_ok=True) if isinstance(file_obj, (str, Path)): src = Path(file_obj) dest = dest_dir / src.name shutil.copy(src, dest) return dest.name path = getattr(file_obj, "path", None) if path: src = Path(path) name = getattr(file_obj, "orig_name", None) or getattr(file_obj, "name", None) or src.name dest = dest_dir / name shutil.copy(src, dest) return dest.name saver = getattr(file_obj, "save", None) if callable(saver): name = getattr(file_obj, "orig_name", None) or getattr(file_obj, "name", None) or "arquivo_subido" dest = dest_dir / name saver(str(dest)) return dest.name reader = getattr(file_obj, "read", None) if callable(reader): data = reader() name = getattr(file_obj, "orig_name", None) or getattr(file_obj, "name", None) or "arquivo_subido" dest = dest_dir / name with open(dest, "wb") as f: f.write(data) return dest.name raise ValueError("Formato de arquivo não reconhecido pelo upload.") def upload_files(files): try: saved = [] for item in (files or []): saved.append(_save_one(item)) msg = "Upload concluído: " + (", ".join(saved) if saved else "nenhum arquivo") return gr.update(choices=list_uploaded_files(), value=None), msg + ". Agora clique em **Indexar**." except Exception as e: return gr.update(choices=list_uploaded_files(), value=None), f"Falha no upload: {type(e).__name__}: {e}" def build_index(): """Ingestão (parse) + construção e carga do índice. Mostra erro detalhado no UI.""" global STATE_INDEXED try: if not _raw_has_files(): STATE_INDEXED = False return "Nenhum arquivo encontrado em backend/data/raw/. Adicione arquivos ou faça upload." if not (settings.INDEX_DIR / "meta.jsonl").exists(): ingest_all() retriever.build() retriever.load() STATE_INDEXED = True return "Índice criado/carregado. Vá para **Conversar** ou **Resumir**." except Exception as e: STATE_INDEXED = False return f"Falha ao indexar/carregar: **{type(e).__name__}** — {e}" def chat_answer(history, message): if not history: history = [] if not STATE_INDEXED: warn = try_autoload_index() if warn: return history + [("system", warn + "\nVá na aba **Upload & Indexar** e clique em **Indexar**.")], "" try: hits = retriever.search(message, top_k=settings.TOP_K) if not hits: ans = "Não encontrei trechos relevantes. Verifique se você **indexou** os arquivos." else: ctx = hits[:settings.TOP_K_RERANK] ans = generate(message, ctx) history = history + [(message, ans)] return history, "" except Exception as e: return history + [("system", f"Falha na busca/resposta: **{type(e).__name__}** — {e}")], "" def summarize_run(filename, pages, chapter, query, style, length): global STATE_INDEXED if not STATE_INDEXED: warn = None try: warn = try_autoload_index() except NameError: try: retriever.load() STATE_INDEXED = True except Exception as e: return ( f"Índice não está pronto: **{type(e).__name__}** — {e}\n" "Vá na aba **Upload & Indexar** e clique em **Indexar**." ) if warn: return warn + "\nVá na aba **Upload & Indexar** e clique em **Indexar**." page_start = page_end = None if pages: try: a, b = pages.split("-") page_start, page_end = int(a), int(b) except: return "Formato de páginas inválido. Use '10-30'." if chapter and filename: rng = retriever.find_chapter_range(filename, chapter) if not rng: return "NÃO ENCONTRADO" page_start, page_end = rng["start"], rng["end"] try: hits = retriever.search( query or "resumo", top_k=16, filename=filename or None, page_start=page_start, page_end=page_end, ) except Exception as e: return f"Falha na busca: **{type(e).__name__}** — {e}" if not hits: return "NÃO ENCONTRADO" try: sents = extractive_sents(hits, query, max_sents=30) if sents: synth = [{"text": s, "meta": {"page_num": p}} for s, p in sents] final = map_reduce(synth, focus=query or "resumo", style=style, length=length) else: final = map_reduce(hits[:8], focus=query or "resumo", style=style, length=length) return final except Exception as e: return f"Falha ao resumir: **{type(e).__name__}** — {e}" with gr.Blocks(title="Agente de Estudos IA") as demo: gr.Markdown("# Agente de Estudos IA \nCarregue seu **livro** e **slides** e estude com o bot.") with gr.Tab("Upload & Indexar"): files_in = gr.File(label="Envie PDFs / PPTX / TXT", file_count="multiple") uploaded_list = gr.Dropdown(label="Arquivos no índice", choices=list_uploaded_files(), interactive=False) upload_btn = gr.Button("Fazer upload") index_btn = gr.Button("Indexar (parse → embeddings → FAISS)") status = gr.Markdown() upload_btn.click(upload_files, inputs=files_in, outputs=[uploaded_list, status]) index_btn.click(build_index, outputs=status) with gr.Tab("Conversar"): chatbot = gr.Chatbot(height=420) msg = gr.Textbox(placeholder="Pergunte algo") send = gr.Button("Enviar", variant="primary") clear = gr.Button("Limpar chat") send.click(chat_answer, inputs=[chatbot, msg], outputs=[chatbot, msg]) clear.click(lambda: [], outputs=chatbot) with gr.Tab("Resumir"): file_dd = gr.Dropdown(label="Arquivo (opcional, senão busca em todos)", choices=list_uploaded_files()) pages_tb = gr.Textbox(label="Páginas (ex.: 12-30)", placeholder="ex.: 45-67") chapter_tb = gr.Textbox(label="Capítulo (opcional, tenta usar TOC do PDF)") query_tb = gr.Textbox(label="Foco do resumo", placeholder="definições, exemplos e complexidades") style_dd = gr.Dropdown(choices=["bullets", "esquema", "discursivo"], value="bullets", label="Estilo") length_dd = gr.Dropdown(choices=["curto", "médio", "longo"], value="médio", label="Tamanho") run_btn = gr.Button("Gerar resumo", variant="primary") out_md = gr.Markdown() run_btn.click(summarize_run, inputs=[file_dd, pages_tb, chapter_tb, query_tb, style_dd, length_dd], outputs=out_md) demo.queue().launch()