import gradio as gr import shutil from typing import Any from pathlib import Path from backend.settings import settings from backend.ingest import ingest_all from backend.rag import retriever from backend.llm_local import generate from backend.summarizer import extractive_sents, map_reduce settings.RAW_DIR.mkdir(parents=True, exist_ok=True) settings.INDEX_DIR.mkdir(parents=True, exist_ok=True) STATE_INDEXED = False def list_uploaded_files(): files = [] for f in settings.RAW_DIR.glob("**/*"): if f.is_file(): files.append(str(f.relative_to(settings.RAW_DIR))) return sorted(files) def build_index(): """Ingestão (parse) + construção e carga do índice. Mostra erro detalhado no UI.""" global STATE_INDEXED try: if not (settings.INDEX_DIR / "meta.jsonl").exists(): ingest_all() # extrai textos e cria meta.jsonl retriever.build() # cria/atualiza FAISS retriever.load() # carrega o índice em memória STATE_INDEXED = True return "Índice criado/carregado. Vá para **Conversar** ou **Resumir**." except Exception as e: STATE_INDEXED = False return f"Falha ao indexar/carregar: **{type(e).__name__}** — {e}" def chat_answer(history, message): """Carrega o índice se preciso e responde; se falhar, mostra o motivo em vez de 'ERRO!'.""" if not history: history = [] try: if not STATE_INDEXED: retriever.load() except Exception as e: return history + [("system", f"Índice não está pronto: **{type(e).__name__}** — {e}\n" "Vá na aba **Upload & Indexar** e clique em **Indexar**.")], "" try: hits = retriever.search(message, top_k=settings.TOP_K) if not hits: ans = "Não encontrei trechos relevantes. Verifique se você **indexou** os arquivos." else: ctx = hits[:settings.TOP_K_RERANK] ans = generate(message, ctx) history = history + [(message, ans)] return history, "" except Exception as e: return history + [("system", f"Falha na busca/resposta: **{type(e).__name__}** — {e}")], "" def summarize_run(filename, pages, chapter, query, style, length): if not STATE_INDEXED: try: retriever.load() except Exception: return "ERRO!" page_start = page_end = None if pages: try: a,b = pages.split("-"); page_start, page_end = int(a), int(b) except: return "Formato de páginas inválido. Use '10-30'." if chapter and filename: rng = retriever.find_chapter_range(filename, chapter) if not rng: return "NÃO ENCONTRADO" page_start, page_end = rng["start"], rng["end"] hits = retriever.search(query or "resumo", top_k=16, filename=filename or None, page_start=page_start, page_end=page_end) if not hits: return "NÃO ENCONTRADO" sents = extractive_sents(hits, query, max_sents=80) if sents: synth = [{"text": s, "meta": {"page_num": p}} for s,p in sents] final = map_reduce(synth, focus=query or "resumo", style=style, length=length) else: final = map_reduce(hits, focus=query or "resumo", style=style, length=length) return final with gr.Blocks(title="Agente de Estudos IA") as demo: gr.Markdown("# Agente de Estudos IA \nCarregue seu **livro** e **slides** e estude com o bot.") with gr.Tab("Upload & Indexar"): files_in = gr.File(label="Envie PDFs / PPTX / TXT", file_count="multiple") uploaded_list = gr.Dropdown(label="Arquivos no índice", choices=list_uploaded_files(), interactive=False) upload_btn = gr.Button("Fazer upload") index_btn = gr.Button("Indexar (parse → embeddings → FAISS)") status = gr.Markdown() upload_btn.click(upload_files, inputs=files_in, outputs=[uploaded_list, status]) index_btn.click(build_index, outputs=status) with gr.Tab("Conversar"): chatbot = gr.Chatbot(height=420) msg = gr.Textbox(placeholder="Pergunte algo") send = gr.Button("Enviar", variant="primary") clear = gr.Button("Limpar chat") send.click(chat_answer, inputs=[chatbot, msg], outputs=[chatbot, msg]) clear.click(lambda: [], outputs=chatbot) with gr.Tab("Resumir"): file_dd = gr.Dropdown(label="Arquivo (opcional, senão busca em todos)", choices=list_uploaded_files()) pages_tb = gr.Textbox(label="Páginas (ex.: 12-30)", placeholder="ex.: 45-67") chapter_tb = gr.Textbox(label="Capítulo (opcional, tenta usar TOC do PDF)") query_tb = gr.Textbox(label="Foco do resumo", placeholder="definições, exemplos e complexidades") style_dd = gr.Dropdown(choices=["bullets", "esquema", "discursivo"], value="bullets", label="Estilo") length_dd = gr.Dropdown(choices=["curto", "médio", "longo"], value="médio", label="Tamanho") run_btn = gr.Button("Gerar resumo", variant="primary") out_md = gr.Markdown() run_btn.click(summarize_run, inputs=[file_dd, pages_tb, chapter_tb, query_tb, style_dd, length_dd], outputs=out_md) def _save_one(file_obj: Any) -> str: """ Salva um item vindo do componente gr.File, qualquer que seja o formato: - str/Path (caminho no disco) - objeto com .path - objeto com .save() - objeto com .name / .orig_name e .read() Retorna o nome salvo. """ dest_dir = settings.RAW_DIR dest_dir.mkdir(parents=True, exist_ok=True) if isinstance(file_obj, (str, Path)): src = Path(file_obj) dest = dest_dir / src.name shutil.copy(src, dest) return dest.name path = getattr(file_obj, "path", None) if path: src = Path(path) name = getattr(file_obj, "orig_name", None) or getattr(file_obj, "name", None) or src.name dest = dest_dir / name shutil.copy(src, dest) return dest.name saver = getattr(file_obj, "save", None) if callable(saver): name = getattr(file_obj, "orig_name", None) or getattr(file_obj, "name", None) or "arquivo_subido" dest = dest_dir / name saver(str(dest)) return dest.name reader = getattr(file_obj, "read", None) if callable(reader): data = reader() name = getattr(file_obj, "orig_name", None) or getattr(file_obj, "name", None) or "arquivo_subido" dest = dest_dir / name with open(dest, "wb") as f: f.write(data) return dest.name raise ValueError("Formato de arquivo não reconhecido pelo upload.") def upload_files(files): try: saved = [] for item in (files or []): saved.append(_save_one(item)) msg = "Upload concluído: " + (", ".join(saved) if saved else "nenhum arquivo") return gr.update(choices=list_uploaded_files(), value=None), msg + ". Agora clique em **Indexar**." except Exception as e: return gr.update(choices=list_uploaded_files(), value=None), f"Falha no upload: {type(e).__name__}: {e}" demo.queue().launch()