Spaces:
Build error
Build error
| import gradio as gr | |
| import shutil | |
| from typing import Any | |
| from pathlib import Path | |
| from backend.settings import settings | |
| from backend.ingest import ingest_all | |
| from backend.rag import retriever | |
| from backend.llm_local import generate | |
| from backend.summarizer import extractive_sents, map_reduce | |
| for p in [settings.RAW_DIR, settings.INDEX_DIR, settings.MODEL_DIR]: | |
| p.mkdir(parents=True, exist_ok=True) | |
| def _index_files_exist() -> bool: | |
| """Checa se o índice FAISS e o cache existem.""" | |
| from backend.rag import retriever | |
| return (retriever.index_path.exists() if hasattr(retriever, "index_path") else False) or \ | |
| (settings.INDEX_DIR / "faiss.index").exists() | |
| def _raw_has_files() -> bool: | |
| return any(settings.RAW_DIR.rglob("*.*")) | |
| def try_autoload_index() -> str | None: | |
| """ | |
| Se já houver índice, carrega. Se houver arquivos em RAW mas não índice, | |
| tenta construir automaticamente. | |
| """ | |
| global STATE_INDEXED | |
| try: | |
| if _index_files_exist(): | |
| retriever.load() | |
| STATE_INDEXED = True | |
| return None | |
| elif _raw_has_files(): | |
| ingest_all() | |
| retriever.build() | |
| retriever.load() | |
| STATE_INDEXED = True | |
| return None | |
| else: | |
| STATE_INDEXED = False | |
| return "Nenhum arquivo em backend/data/raw/. Envie arquivos (ou comite nessa pasta) e clique em Indexar." | |
| except Exception as e: | |
| STATE_INDEXED = False | |
| return f"Falha ao preparar índice automaticamente: {type(e).__name__} — {e}" | |
| STATE_INDEXED = False | |
| def list_uploaded_files(): | |
| files = [] | |
| for f in settings.RAW_DIR.glob("**/*"): | |
| if f.is_file(): | |
| files.append(str(f.relative_to(settings.RAW_DIR))) | |
| return sorted(files) | |
| def _save_one(file_obj: Any) -> str: | |
| """ | |
| Salva um item vindo do componente gr.File, qualquer que seja o formato: | |
| - str/Path (caminho no disco) | |
| - objeto com .path | |
| - objeto com .save() | |
| - objeto com .name / .orig_name e .read() | |
| Retorna o nome salvo. | |
| """ | |
| dest_dir = settings.RAW_DIR | |
| dest_dir.mkdir(parents=True, exist_ok=True) | |
| if isinstance(file_obj, (str, Path)): | |
| src = Path(file_obj) | |
| dest = dest_dir / src.name | |
| shutil.copy(src, dest) | |
| return dest.name | |
| path = getattr(file_obj, "path", None) | |
| if path: | |
| src = Path(path) | |
| name = getattr(file_obj, "orig_name", None) or getattr(file_obj, "name", None) or src.name | |
| dest = dest_dir / name | |
| shutil.copy(src, dest) | |
| return dest.name | |
| saver = getattr(file_obj, "save", None) | |
| if callable(saver): | |
| name = getattr(file_obj, "orig_name", None) or getattr(file_obj, "name", None) or "arquivo_subido" | |
| dest = dest_dir / name | |
| saver(str(dest)) | |
| return dest.name | |
| reader = getattr(file_obj, "read", None) | |
| if callable(reader): | |
| data = reader() | |
| name = getattr(file_obj, "orig_name", None) or getattr(file_obj, "name", None) or "arquivo_subido" | |
| dest = dest_dir / name | |
| with open(dest, "wb") as f: | |
| f.write(data) | |
| return dest.name | |
| raise ValueError("Formato de arquivo não reconhecido pelo upload.") | |
| def upload_files(files): | |
| try: | |
| saved = [] | |
| for item in (files or []): | |
| saved.append(_save_one(item)) | |
| msg = "Upload concluído: " + (", ".join(saved) if saved else "nenhum arquivo") | |
| return gr.update(choices=list_uploaded_files(), value=None), msg + ". Agora clique em **Indexar**." | |
| except Exception as e: | |
| return gr.update(choices=list_uploaded_files(), value=None), f"Falha no upload: {type(e).__name__}: {e}" | |
| def build_index(): | |
| """Ingestão (parse) + construção e carga do índice. Mostra erro detalhado no UI.""" | |
| global STATE_INDEXED | |
| try: | |
| if not _raw_has_files(): | |
| STATE_INDEXED = False | |
| return "Nenhum arquivo encontrado em backend/data/raw/. Adicione arquivos ou faça upload." | |
| if not (settings.INDEX_DIR / "meta.jsonl").exists(): | |
| ingest_all() | |
| retriever.build() | |
| retriever.load() | |
| STATE_INDEXED = True | |
| return "Índice criado/carregado. Vá para **Conversar** ou **Resumir**." | |
| except Exception as e: | |
| STATE_INDEXED = False | |
| return f"Falha ao indexar/carregar: **{type(e).__name__}** — {e}" | |
| def chat_answer(history, message): | |
| if not history: | |
| history = [] | |
| if not STATE_INDEXED: | |
| warn = try_autoload_index() | |
| if warn: | |
| return history + [("system", warn + "\nVá na aba **Upload & Indexar** e clique em **Indexar**.")], "" | |
| try: | |
| hits = retriever.search(message, top_k=settings.TOP_K) | |
| if not hits: | |
| ans = "Não encontrei trechos relevantes. Verifique se você **indexou** os arquivos." | |
| else: | |
| ctx = hits[:settings.TOP_K_RERANK] | |
| ans = generate(message, ctx) | |
| history = history + [(message, ans)] | |
| return history, "" | |
| except Exception as e: | |
| return history + [("system", f"Falha na busca/resposta: **{type(e).__name__}** — {e}")], "" | |
| def summarize_run(filename, pages, chapter, query, style, length): | |
| global STATE_INDEXED | |
| if not STATE_INDEXED: | |
| warn = None | |
| try: | |
| warn = try_autoload_index() | |
| except NameError: | |
| try: | |
| retriever.load() | |
| STATE_INDEXED = True | |
| except Exception as e: | |
| return ( | |
| f"Índice não está pronto: **{type(e).__name__}** — {e}\n" | |
| "Vá na aba **Upload & Indexar** e clique em **Indexar**." | |
| ) | |
| if warn: | |
| return warn + "\nVá na aba **Upload & Indexar** e clique em **Indexar**." | |
| page_start = page_end = None | |
| if pages: | |
| try: | |
| a, b = pages.split("-") | |
| page_start, page_end = int(a), int(b) | |
| except: | |
| return "Formato de páginas inválido. Use '10-30'." | |
| if chapter and filename: | |
| rng = retriever.find_chapter_range(filename, chapter) | |
| if not rng: | |
| return "NÃO ENCONTRADO" | |
| page_start, page_end = rng["start"], rng["end"] | |
| try: | |
| hits = retriever.search( | |
| query or "resumo", | |
| top_k=16, | |
| filename=filename or None, | |
| page_start=page_start, | |
| page_end=page_end, | |
| ) | |
| except Exception as e: | |
| return f"Falha na busca: **{type(e).__name__}** — {e}" | |
| if not hits: | |
| return "NÃO ENCONTRADO" | |
| try: | |
| sents = extractive_sents(hits, query, max_sents=30) | |
| if sents: | |
| synth = [{"text": s, "meta": {"page_num": p}} for s, p in sents] | |
| final = map_reduce(synth, focus=query or "resumo", style=style, length=length) | |
| else: | |
| final = map_reduce(hits[:8], focus=query or "resumo", style=style, length=length) | |
| return final | |
| except Exception as e: | |
| return f"Falha ao resumir: **{type(e).__name__}** — {e}" | |
| with gr.Blocks(title="Agente de Estudos IA") as demo: | |
| gr.Markdown("# Agente de Estudos IA \nCarregue seu **livro** e **slides** e estude com o bot.") | |
| with gr.Tab("Upload & Indexar"): | |
| files_in = gr.File(label="Envie PDFs / PPTX / TXT", file_count="multiple") | |
| uploaded_list = gr.Dropdown(label="Arquivos no índice", choices=list_uploaded_files(), interactive=False) | |
| upload_btn = gr.Button("Fazer upload") | |
| index_btn = gr.Button("Indexar (parse → embeddings → FAISS)") | |
| status = gr.Markdown() | |
| upload_btn.click(upload_files, inputs=files_in, outputs=[uploaded_list, status]) | |
| index_btn.click(build_index, outputs=status) | |
| with gr.Tab("Conversar"): | |
| chatbot = gr.Chatbot(height=420) | |
| msg = gr.Textbox(placeholder="Pergunte algo") | |
| send = gr.Button("Enviar", variant="primary") | |
| clear = gr.Button("Limpar chat") | |
| send.click(chat_answer, inputs=[chatbot, msg], outputs=[chatbot, msg]) | |
| clear.click(lambda: [], outputs=chatbot) | |
| with gr.Tab("Resumir"): | |
| file_dd = gr.Dropdown(label="Arquivo (opcional, senão busca em todos)", choices=list_uploaded_files()) | |
| pages_tb = gr.Textbox(label="Páginas (ex.: 12-30)", placeholder="ex.: 45-67") | |
| chapter_tb = gr.Textbox(label="Capítulo (opcional, tenta usar TOC do PDF)") | |
| query_tb = gr.Textbox(label="Foco do resumo", placeholder="definições, exemplos e complexidades") | |
| style_dd = gr.Dropdown(choices=["bullets", "esquema", "discursivo"], value="bullets", label="Estilo") | |
| length_dd = gr.Dropdown(choices=["curto", "médio", "longo"], value="médio", label="Tamanho") | |
| run_btn = gr.Button("Gerar resumo", variant="primary") | |
| out_md = gr.Markdown() | |
| run_btn.click(summarize_run, | |
| inputs=[file_dd, pages_tb, chapter_tb, query_tb, style_dd, length_dd], | |
| outputs=out_md) | |
| demo.queue().launch() | |