EmreG commited on
Commit
559480b
·
1 Parent(s): 8004403

minor interface updates

Browse files
Files changed (1) hide show
  1. app.py +2 -364
app.py CHANGED
@@ -135,295 +135,7 @@ def extract_and_parse(sentence: str):
135
  edges_str += "No edges found."
136
  return edges_str
137
 
138
- demo = gr.Interface(
139
- fn=extract_and_parse,
140
- inputs=gr.Textbox(label="Abstract or Sentence", lines=4, placeholder="Paste your scientific abstract here..."),
141
- outputs=gr.Textbox(label="Extracted Scientific Triples (subject --relation--> object)", lines=8),
142
- title="Scientific Relationship Extractor",
143
- description="Extracts Scientific Triples (subject, relation, object) triples from scientific text using Mistral-7B-Instruct."
144
- )
145
-
146
-
147
- # def extract_and_visualize(sentence: str):
148
- # # 1) Run extraction & build KG
149
- # raw_output = run_mistral_extract(sentence)
150
- # parsed = parse_mistral_output(raw_output)
151
- # G = build_kg(parsed["triples"])
152
-
153
- # # 2) Build pyvis network
154
- # net = Network(height="600px", width="100%", bgcolor="#ffffff", directed=True)
155
- # net.toggle_physics(True)
156
-
157
- # # Add nodes and edges (ensure node ids are strings)
158
- # for node in G.nodes:
159
- # net.add_node(str(node), label=str(node))
160
- # for u, v, d in G.edges(data=True):
161
- # net.add_edge(str(u), str(v), label=d.get("label", ""))
162
-
163
- # # 3) Save to a temporary HTML file and read it back
164
- # with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".html", encoding="utf-8") as tmp:
165
- # tmp_path = tmp.name
166
- # net.save_graph(tmp_path)
167
-
168
- # with open(tmp_path, "r", encoding="utf-8") as f:
169
- # html_content = f.read()
170
-
171
- # # 4) Escape and wrap in iframe using srcdoc so scripts run properly
172
- # iframe = (
173
- # "<iframe "
174
- # "srcdoc='" + html.escape(html_content) + "' "
175
- # "style='width:100%;height:650px;border:none;'></iframe>"
176
- # )
177
-
178
- # # Optional: include significance text below the iframe
179
- # significance = parsed.get("significance", "")
180
- # if significance:
181
- # # Safe simple HTML for description (escape to avoid injection)
182
- # iframe += "<div style='margin-top:8px; font-family:system-ui;'>" \
183
- # "<strong>Significance:</strong><p>" + html.escape(significance) + "</p></div>"
184
-
185
- # return iframe
186
-
187
- # def extract_and_visualize(sentence: str):
188
- # # 1. Run extraction & parse triples
189
- # raw_output = run_mistral_extract(sentence)
190
- # parsed = parse_mistral_output(raw_output)
191
- # G = build_kg(parsed["triples"])
192
-
193
- # # 2. Build PyVis network
194
- # net = Network(height="650px", width="100%", bgcolor="#ffffff", directed=True)
195
- # net.toggle_physics(True)
196
- # net.set_options("""
197
- # var options = {
198
- # "nodes": {
199
- # "shape": "dot",
200
- # "size": 20,
201
- # "font": {"size": 18, "face": "arial"},
202
- # "borderWidth": 2
203
- # },
204
- # "edges": {
205
- # "arrows": {"to": {"enabled": true, "scaleFactor": 0.8}},
206
- # "color": {"inherit": false},
207
- # "smooth": false,
208
- # "font": {"size": 14, "align": "horizontal"}
209
- # },
210
- # "physics": {
211
- # "forceAtlas2Based": {"gravitationalConstant": -50, "centralGravity": 0.01},
212
- # "minVelocity": 0.75,
213
- # "solver": "forceAtlas2Based",
214
- # "stabilization": {"fit": false}
215
- # }
216
- # }
217
- # """)
218
-
219
- # # Add nodes and edges
220
- # for node in G.nodes:
221
- # net.add_node(
222
- # str(node),
223
- # label=str(node),
224
- # title=f"Entity: {node}",
225
- # color="#0077b6" if G.out_degree(node) > 0 else "#90e0ef"
226
- # )
227
-
228
- # for u, v, d in G.edges(data=True):
229
- # rel = d.get("label", "")
230
- # net.add_edge(str(u), str(v), label=rel, title=f"Relation: {rel}", color="#023e8a")
231
-
232
- # # 3. Save HTML & embed as iframe
233
- # with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".html", encoding="utf-8") as tmp:
234
- # net.save_graph(tmp.name)
235
- # tmp_path = tmp.name
236
-
237
- # with open(tmp_path, "r", encoding="utf-8") as f:
238
- # html_content = f.read()
239
-
240
- # iframe_html = f"""
241
- # <div style='border-radius: 12px; box-shadow: 0 4px 10px rgba(0,0,0,0.1); overflow:hidden;'>
242
- # <iframe
243
- # srcdoc='{html.escape(html_content)}'
244
- # style='width:100%; height:650px; border:none; border-radius:12px;'>
245
- # </iframe>
246
- # </div>
247
- # """
248
- # return iframe_html
249
-
250
-
251
- ## Pure chatGPT here
252
- # def extract_and_visualize(sentence: str):
253
- # # 1. Run extraction & parse triples
254
- # raw_output = run_mistral_extract(sentence)
255
- # parsed = parse_mistral_output(raw_output)
256
- # G = build_kg(parsed["triples"])
257
-
258
- # # 2. Find the largest connected component
259
- # largest_cc = max(nx.connected_components(G.to_undirected()), key=len)
260
-
261
- # # 3. Build PyVis network
262
- # net = Network(height="650px", width="100%", bgcolor="#ffffff", directed=True)
263
- # net.toggle_physics(True)
264
- # net.set_options("""
265
- # var options = {
266
- # "nodes": {
267
- # "shape": "dot",
268
- # "size": 20,
269
- # "font": {"size": 18, "face": "arial", "color": "#000000"},
270
- # "borderWidth": 2
271
- # },
272
- # "edges": {
273
- # "arrows": {"to": {"enabled": true, "scaleFactor": 0.8}},
274
- # "color": {"inherit": false},
275
- # "smooth": false,
276
- # "font": {"size": 14, "align": "horizontal"}
277
- # },
278
- # "physics": {
279
- # "forceAtlas2Based": {"gravitationalConstant": -50, "centralGravity": 0.01},
280
- # "minVelocity": 0.75,
281
- # "solver": "forceAtlas2Based",
282
- # "stabilization": {"fit": false}
283
- # }
284
- # }
285
- # """)
286
-
287
- # # 4. Add nodes
288
- # for node in G.nodes:
289
- # is_main = node in largest_cc
290
- # base_color = "#0077b6" if G.out_degree(node) > 0 else "#90e0ef"
291
- # if is_main:
292
- # color = base_color
293
- # font_color = "#000000"
294
- # else:
295
- # color = "rgba(0,119,182,0.1)"
296
- # font_color = "rgba(0,0,0,0.1)"
297
- # net.add_node(
298
- # str(node),
299
- # label=str(node),
300
- # title=f"Entity: {node}",
301
- # color=color,
302
- # font={"size": 18, "color": font_color},
303
- # )
304
-
305
- # # 5. Add edges (relationships)
306
- # for u, v, d in G.edges(data=True):
307
- # rel = d.get("label", "")
308
- # is_main = u in largest_cc and v in largest_cc
309
- # if is_main:
310
- # edge_color = "rgba(2,62,138,0.6)" # 60% opacity for main
311
- # font_color = "rgba(0,0,0,0.6)"
312
- # else:
313
- # edge_color = "rgba(2,62,138,0.1)" # 10% for non-main
314
- # font_color = "rgba(0,0,0,0.1)"
315
- # net.add_edge(
316
- # str(u), str(v),
317
- # label=rel,
318
- # title=f"Relation: {rel}",
319
- # color=edge_color,
320
- # font={"color": font_color}
321
- # )
322
-
323
- # # 6. Save HTML
324
- # with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".html", encoding="utf-8") as tmp:
325
- # net.save_graph(tmp.name)
326
- # tmp_path = tmp.name
327
-
328
- # with open(tmp_path, "r", encoding="utf-8") as f:
329
- # html_content = f.read()
330
-
331
- # # 7. Inject JS for dynamic hover behavior
332
- # js_script = """
333
- # <script type="text/javascript">
334
- # const nodeColors = {};
335
- # const edgeColors = {};
336
- # nodes.forEach((n) => nodeColors[n.id] = n.color);
337
- # edges.forEach((e) => edgeColors[e.id] = e.color);
338
-
339
- # let mainComponent = new Set(
340
- # nodes.getIds().filter(id => nodeColors[id].includes("0.6") || nodeColors[id] === "#0077b6")
341
- # );
342
- # let activeNode = null;
343
-
344
- # // ---- Helper functions ----
345
- # function setOpacity(targetNodeId, fadeNonConnected = true) {
346
- # const connectedNodes = network.getConnectedNodes(targetNodeId);
347
- # const connectedEdges = network.getConnectedEdges(targetNodeId);
348
-
349
- # nodes.forEach((n) => {
350
- # const connected = n.id === targetNodeId || connectedNodes.includes(n.id);
351
- # const color = nodeColors[n.id];
352
- # const rgb = color.startsWith("rgba")
353
- # ? color.match(/rgba?\\(([^)]+)\\)/)[1].split(",").slice(0,3).join(",")
354
- # : `${parseInt(color.slice(1,3),16)},${parseInt(color.slice(3,5),16)},${parseInt(color.slice(5,7),16)}`;
355
- # const newColor = connected ? `rgba(${rgb},1)` : `rgba(${rgb},0.1)`;
356
- # const fontColor = connected ? "rgba(0,0,0,1)" : "rgba(0,0,0,0.1)";
357
- # nodes.update({id: n.id, color: newColor, font: {color: fontColor}});
358
- # });
359
-
360
- # edges.forEach((e) => {
361
- # const connected = connectedEdges.includes(e.id);
362
- # const color = edgeColors[e.id];
363
- # const rgb = color.startsWith("rgba")
364
- # ? color.match(/rgba?\\(([^)]+)\\)/)[1].split(",").slice(0,3).join(",")
365
- # : `${parseInt(color.slice(1,3),16)},${parseInt(color.slice(3,5),16)},${parseInt(color.slice(5,7),16)}`;
366
- # const newColor = connected ? `rgba(${rgb},0.6)` : `rgba(${rgb},0.1)`;
367
- # const fontColor = connected ? "rgba(0,0,0,1)" : "rgba(0,0,0,0.1)";
368
- # edges.update({id: e.id, color: newColor, font: {color: fontColor}});
369
- # });
370
- # }
371
-
372
- # function highlightMainComponent() {
373
- # nodes.forEach((n) => {
374
- # const inMain = mainComponent.has(n.id);
375
- # const color = nodeColors[n.id];
376
- # const rgb = color.startsWith("rgba")
377
- # ? color.match(/rgba?\\(([^)]+)\\)/)[1].split(",").slice(0,3).join(",")
378
- # : `${parseInt(color.slice(1,3),16)},${parseInt(color.slice(3,5),16)},${parseInt(color.slice(5,7),16)}`;
379
- # const newColor = inMain ? `rgba(${rgb},1)` : `rgba(${rgb},0.1)`;
380
- # const fontColor = inMain ? "rgba(0,0,0,1)" : "rgba(0,0,0,0.1)";
381
- # nodes.update({id: n.id, color: newColor, font: {color: fontColor}});
382
- # });
383
-
384
- # edges.forEach((e) => {
385
- # const n1InMain = mainComponent.has(e.from);
386
- # const n2InMain = mainComponent.has(e.to);
387
- # const inMain = n1InMain && n2InMain;
388
- # const color = edgeColors[e.id];
389
- # const rgb = color.startsWith("rgba")
390
- # ? color.match(/rgba?\\(([^)]+)\\)/)[1].split(",").slice(0,3).join(",")
391
- # : `${parseInt(color.slice(1,3),16)},${parseInt(color.slice(3,5),16)},${parseInt(color.slice(5,7),16)}`;
392
- # const newColor = inMain ? `rgba(${rgb},0.6)` : `rgba(${rgb},0.1)`;
393
- # const fontColor = inMain ? "rgba(0,0,0,0.6)" : "rgba(0,0,0,0.1)";
394
- # edges.update({id: e.id, color: newColor, font: {color: fontColor}});
395
- # });
396
- # }
397
-
398
- # // ---- Event bindings ----
399
- # network.on("click", (params) => {
400
- # if (params.nodes.length > 0) {
401
- # activeNode = params.nodes[0];
402
- # setOpacity(activeNode);
403
- # } else {
404
- # activeNode = null;
405
- # highlightMainComponent(); // reset to main view
406
- # }
407
- # });
408
-
409
- # network.once("stabilizationIterationsDone", () => {
410
- # highlightMainComponent(); // initial state
411
- # });
412
- # </script>
413
- # """
414
-
415
- # html_content = html_content.replace("</body>", js_script + "\n</body>")
416
-
417
- # iframe_html = f"""
418
- # <div style='border-radius: 12px; box-shadow: 0 4px 10px rgba(0,0,0,0.1); overflow:hidden;'>
419
- # <iframe
420
- # srcdoc='{html.escape(html_content)}'
421
- # style='width:100%; height:650px; border:none; border-radius:12px;'>
422
- # </iframe>
423
- # </div>
424
- # """
425
-
426
- # return iframe_html
427
  def extract_and_visualize(sentence: str):
428
  # 1. Extract triples and build graph
429
  raw_output = run_mistral_extract(sentence)
@@ -526,13 +238,9 @@ def extract_and_visualize(sentence: str):
526
  """
527
 
528
  html_content = html_content.replace("</body>", js_script + "\n</body>")
529
-
530
  return f"<iframe srcdoc='{html.escape(html_content)}' style='width:100%; height:650px; border:none; border-radius:12px;'></iframe>"
531
 
532
 
533
-
534
-
535
-
536
  abstract = (
537
  "Understanding the atomic structure of magnetite–carboxylic acid interfaces is crucial "
538
  "for tailoring nanocomposites involving this interface. We present a Monte Carlo (MC)-based "
@@ -569,7 +277,7 @@ def preload(preloaded):
569
  # Returns HTML, empty textbox, preloaded state
570
  return preloaded_output, "", True
571
 
572
- # ---- Interface ----
573
  with gr.Blocks() as demo:
574
  gr.Markdown("# Scientific Knowledge Graph Generator")
575
  with gr.Row():
@@ -610,73 +318,3 @@ with gr.Blocks() as demo:
610
 
611
  if __name__ == "__main__":
612
  demo.launch(share=False)
613
-
614
- # # ---- Preload ----
615
- # CACHE_DIR = "cache"
616
- # os.makedirs(CACHE_DIR, exist_ok=True)
617
- # CACHE_PATH = os.path.join(CACHE_DIR, "kg_cache.html")
618
-
619
- # def get_preload_output():
620
- # if os.path.exists(CACHE_PATH):
621
- # with open(CACHE_PATH, "r", encoding="utf-8") as f:
622
- # html_output = f.read()
623
- # sentence = abstract
624
-
625
- # else:
626
- # sentence = abstract
627
- # html_output = extract_and_visualize(sentence)
628
- # with open(CACHE_PATH, "w", encoding="utf-8") as f:
629
- # f.write(html_output)
630
- # return html_output, sentence
631
-
632
- # def preload(preloaded):
633
- # html_output, sentence = get_preload_output()
634
- # return html_output, sentence, True
635
-
636
-
637
- # # interface
638
- # with gr.Blocks() as demo:
639
- # gr.Markdown("# Scientific Knowledge Graph Generator")
640
- # with gr.Row():
641
- # with gr.Column(scale=0.9):
642
- # gr.Markdown(
643
- # "A lightweight app that generates scientific knowledge graphs.\n\n"
644
- # "### How it works\n"
645
- # "1. Extracts subject–relation–object (SRO) triples from scientific texts using a [large language model](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3).\n"
646
- # "2. Visualizes SRO triples as interactive knowledge graphs.\n"
647
- # )
648
- # input_text = gr.Textbox(
649
- # label="Text",
650
- # lines=4,
651
- # placeholder="Paste the scientific text here (e.g., abstract)."
652
- # )
653
- # examples = gr.Examples(
654
- # examples=[abstract],
655
- # inputs=[input_text],
656
- # visible=False,
657
- # )
658
- # btn = gr.Button("Submit", variant="primary")
659
- # gr.Markdown("Note: Hover your mouse over the node/edge for more details.")
660
- # with gr.Column(scale=1.2):
661
- # output_html = gr.HTML(label="Knowledge Graph Visualization")
662
-
663
- # preloaded = gr.State(value=False)
664
- # demo.load(fn=preload, inputs=preloaded, outputs=[output_html, input_text, preloaded])
665
- # btn.click(fn=extract_and_visualize, inputs=input_text, outputs=output_html)
666
-
667
- # if __name__ == "__main__":
668
- demo.launch(share=False)
669
-
670
-
671
-
672
- # demo = gr.Interface(
673
- # fn=extract_and_visualize,
674
- # inputs=gr.Textbox(label="Abstract or Sentence", lines=4, placeholder="Paste your scientific abstract here..."),
675
- # outputs=gr.HTML(label="Knowledge Graph Visualization"),
676
- # title="Scientific Relationship Extractor (Mistral-7B)",
677
- # description="Extracts subject–relation–object triples and visualizes them as an interactive knowledge graph."
678
- # )
679
-
680
-
681
- # if __name__ == "__main__":
682
- # demo.launch()
 
135
  edges_str += "No edges found."
136
  return edges_str
137
 
138
+ # return iframe_html, all the html and JS part is chatGPT magic
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  def extract_and_visualize(sentence: str):
140
  # 1. Extract triples and build graph
141
  raw_output = run_mistral_extract(sentence)
 
238
  """
239
 
240
  html_content = html_content.replace("</body>", js_script + "\n</body>")
 
241
  return f"<iframe srcdoc='{html.escape(html_content)}' style='width:100%; height:650px; border:none; border-radius:12px;'></iframe>"
242
 
243
 
 
 
 
244
  abstract = (
245
  "Understanding the atomic structure of magnetite–carboxylic acid interfaces is crucial "
246
  "for tailoring nanocomposites involving this interface. We present a Monte Carlo (MC)-based "
 
277
  # Returns HTML, empty textbox, preloaded state
278
  return preloaded_output, "", True
279
 
280
+ # Interface
281
  with gr.Blocks() as demo:
282
  gr.Markdown("# Scientific Knowledge Graph Generator")
283
  with gr.Row():
 
318
 
319
  if __name__ == "__main__":
320
  demo.launch(share=False)