File size: 9,101 Bytes
6be80ab
4a14ca3
 
7068798
 
 
 
 
 
 
31b967b
0e7f7f3
ae1872c
 
4a14ca3
0e7f7f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7068798
 
ae1872c
7068798
 
 
 
 
 
 
ae1872c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7068798
 
c17777d
7068798
c17777d
0e7f7f3
 
 
 
 
c17777d
0e7f7f3
 
 
 
c17777d
 
 
 
 
 
0e7f7f3
7068798
c17777d
 
 
0e7f7f3
 
 
 
 
c17777d
 
 
 
 
 
 
 
 
 
 
 
 
7068798
cbe7819
aba5616
 
 
 
 
 
 
 
 
cbe7819
 
 
 
aba5616
 
 
 
cbe7819
 
 
 
 
7068798
 
 
aba5616
 
7068798
31b967b
cbe7819
7068798
 
 
31b967b
7068798
cbe7819
 
aba5616
 
 
 
 
 
 
cbe7819
 
7068798
31b967b
d7c5da9
cbe7819
aba5616
cbe7819
 
 
 
aba5616
cbe7819
 
 
 
7068798
ae1872c
31b967b
 
7068798
 
177a39e
 
 
 
 
 
 
7068798
 
 
4a14ca3
7068798
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c9e178
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
import gradio as gr
import shutil
from typing import Any
from pathlib import Path
from backend.settings import settings
from backend.ingest import ingest_all
from backend.rag import retriever
from backend.llm_local import generate
from backend.summarizer import extractive_sents, map_reduce



for p in [settings.RAW_DIR, settings.INDEX_DIR, settings.MODEL_DIR]:
    p.mkdir(parents=True, exist_ok=True)

def _index_files_exist() -> bool:
    """Checa se o índice FAISS e o cache existem."""
    from backend.rag import retriever
    return (retriever.index_path.exists() if hasattr(retriever, "index_path") else False) or \
           (settings.INDEX_DIR / "faiss.index").exists()

def _raw_has_files() -> bool:
    return any(settings.RAW_DIR.rglob("*.*"))

def try_autoload_index() -> str | None:
    """
    Se já houver índice, carrega. Se houver arquivos em RAW mas não índice,
    tenta construir automaticamente.
    """
    global STATE_INDEXED
    try:
        if _index_files_exist():
            retriever.load()         
            STATE_INDEXED = True
            return None
        elif _raw_has_files():
            
            ingest_all()              
            retriever.build()       
            retriever.load()
            STATE_INDEXED = True
            return None
        else:
            STATE_INDEXED = False
            return "Nenhum arquivo em backend/data/raw/. Envie arquivos (ou comite nessa pasta) e clique em Indexar."
    except Exception as e:
        STATE_INDEXED = False
        return f"Falha ao preparar índice automaticamente: {type(e).__name__}{e}"




STATE_INDEXED = False


def list_uploaded_files():
    files = []
    for f in settings.RAW_DIR.glob("**/*"):
        if f.is_file():
            files.append(str(f.relative_to(settings.RAW_DIR)))
    return sorted(files)

def _save_one(file_obj: Any) -> str:
    """
    Salva um item vindo do componente gr.File, qualquer que seja o formato:
    - str/Path (caminho no disco)
    - objeto com .path
    - objeto com .save()
    - objeto com .name / .orig_name e .read()
    Retorna o nome salvo.
    """
    dest_dir = settings.RAW_DIR
    dest_dir.mkdir(parents=True, exist_ok=True)

    if isinstance(file_obj, (str, Path)):
        src = Path(file_obj)
        dest = dest_dir / src.name
        shutil.copy(src, dest)
        return dest.name

    path = getattr(file_obj, "path", None)
    if path:
        src = Path(path)
        name = getattr(file_obj, "orig_name", None) or getattr(file_obj, "name", None) or src.name
        dest = dest_dir / name
        shutil.copy(src, dest)
        return dest.name

    saver = getattr(file_obj, "save", None)
    if callable(saver):
        name = getattr(file_obj, "orig_name", None) or getattr(file_obj, "name", None) or "arquivo_subido"
        dest = dest_dir / name
        saver(str(dest))
        return dest.name

    reader = getattr(file_obj, "read", None)
    if callable(reader):
        data = reader()
        name = getattr(file_obj, "orig_name", None) or getattr(file_obj, "name", None) or "arquivo_subido"
        dest = dest_dir / name
        with open(dest, "wb") as f:
            f.write(data)
        return dest.name

    raise ValueError("Formato de arquivo não reconhecido pelo upload.")

def upload_files(files):
    try:
        saved = []
        for item in (files or []):
            saved.append(_save_one(item))
        msg = "Upload concluído: " + (", ".join(saved) if saved else "nenhum arquivo")
        return gr.update(choices=list_uploaded_files(), value=None), msg + ". Agora clique em **Indexar**."
    except Exception as e:
        return gr.update(choices=list_uploaded_files(), value=None), f"Falha no upload: {type(e).__name__}: {e}"


def build_index():
    """Ingestão (parse) + construção e carga do índice. Mostra erro detalhado no UI."""
    global STATE_INDEXED
    try:
        if not _raw_has_files():
            STATE_INDEXED = False
            return "Nenhum arquivo encontrado em backend/data/raw/. Adicione arquivos ou faça upload."


        if not (settings.INDEX_DIR / "meta.jsonl").exists():
            ingest_all()

        retriever.build()
        retriever.load()
        STATE_INDEXED = True
        return "Índice criado/carregado. Vá para **Conversar** ou **Resumir**."
    except Exception as e:
        STATE_INDEXED = False
        return f"Falha ao indexar/carregar: **{type(e).__name__}** — {e}"


def chat_answer(history, message):
    if not history:
        history = []


    if not STATE_INDEXED:
        warn = try_autoload_index()
        if warn:
            return history + [("system", warn + "\nVá na aba **Upload & Indexar** e clique em **Indexar**.")], ""

    try:
        hits = retriever.search(message, top_k=settings.TOP_K)
        if not hits:
            ans = "Não encontrei trechos relevantes. Verifique se você **indexou** os arquivos."
        else:
            ctx = hits[:settings.TOP_K_RERANK]
            ans = generate(message, ctx)
        history = history + [(message, ans)]
        return history, ""
    except Exception as e:
        return history + [("system", f"Falha na busca/resposta: **{type(e).__name__}** — {e}")], ""

def summarize_run(filename, pages, chapter, query, style, length):
    global STATE_INDEXED


    if not STATE_INDEXED:
        warn = None
        try:

            warn = try_autoload_index()
        except NameError:

            try:
                retriever.load()
                STATE_INDEXED = True
            except Exception as e:
                return (
                    f"Índice não está pronto: **{type(e).__name__}** — {e}\n"
                    "Vá na aba **Upload & Indexar** e clique em **Indexar**."
                )

        if warn:
            return warn + "\nVá na aba **Upload & Indexar** e clique em **Indexar**."


    page_start = page_end = None
    if pages:
        try:
            a, b = pages.split("-")
            page_start, page_end = int(a), int(b)
        except:
            return "Formato de páginas inválido. Use '10-30'."

    if chapter and filename:
        rng = retriever.find_chapter_range(filename, chapter)
        if not rng:
            return "NÃO ENCONTRADO"
        page_start, page_end = rng["start"], rng["end"]

    try:
        hits = retriever.search(
            query or "resumo",
            top_k=16,
            filename=filename or None,
            page_start=page_start,
            page_end=page_end,
        )
    except Exception as e:
        return f"Falha na busca: **{type(e).__name__}** — {e}"
    if not hits:
        return "NÃO ENCONTRADO"

    try:
        sents = extractive_sents(hits, query, max_sents=30)  
        if sents:
            synth = [{"text": s, "meta": {"page_num": p}} for s, p in sents]
            final = map_reduce(synth, focus=query or "resumo", style=style, length=length)
        else:
            final = map_reduce(hits[:8], focus=query or "resumo", style=style, length=length)
        return final
    except Exception as e:
        return f"Falha ao resumir: **{type(e).__name__}** — {e}"



with gr.Blocks(title="Agente de Estudos IA") as demo:
    gr.Markdown("# Agente de Estudos IA \nCarregue seu **livro** e **slides** e estude com o bot.")

    with gr.Tab("Upload & Indexar"):
        files_in = gr.File(label="Envie PDFs / PPTX / TXT", file_count="multiple")
        uploaded_list = gr.Dropdown(label="Arquivos no índice", choices=list_uploaded_files(), interactive=False)
        upload_btn = gr.Button("Fazer upload")
        index_btn = gr.Button("Indexar (parse → embeddings → FAISS)")
        status = gr.Markdown()
        upload_btn.click(upload_files, inputs=files_in, outputs=[uploaded_list, status])
        index_btn.click(build_index, outputs=status)

    with gr.Tab("Conversar"):
        chatbot = gr.Chatbot(height=420)
        msg = gr.Textbox(placeholder="Pergunte algo")
        send = gr.Button("Enviar", variant="primary")
        clear = gr.Button("Limpar chat")
        send.click(chat_answer, inputs=[chatbot, msg], outputs=[chatbot, msg])
        clear.click(lambda: [], outputs=chatbot)

    with gr.Tab("Resumir"):
        file_dd = gr.Dropdown(label="Arquivo (opcional, senão busca em todos)", choices=list_uploaded_files())
        pages_tb = gr.Textbox(label="Páginas (ex.: 12-30)", placeholder="ex.: 45-67")
        chapter_tb = gr.Textbox(label="Capítulo (opcional, tenta usar TOC do PDF)")
        query_tb = gr.Textbox(label="Foco do resumo", placeholder="definições, exemplos e complexidades")
        style_dd = gr.Dropdown(choices=["bullets", "esquema", "discursivo"], value="bullets", label="Estilo")
        length_dd = gr.Dropdown(choices=["curto", "médio", "longo"], value="médio", label="Tamanho")
        run_btn = gr.Button("Gerar resumo", variant="primary")
        out_md = gr.Markdown()
        run_btn.click(summarize_run,
                      inputs=[file_dd, pages_tb, chapter_tb, query_tb, style_dd, length_dd],
                      outputs=out_md)

demo.queue().launch()