Andy Janco
commited on
Commit
·
6941336
1
Parent(s):
5e74ada
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,8 @@ import streamlit as st
|
|
| 2 |
import textract
|
| 3 |
import tempfile
|
| 4 |
import spacy
|
| 5 |
-
from spacy.tokens import DocBin
|
|
|
|
| 6 |
import srsly
|
| 7 |
|
| 8 |
st.title('Index and Search a Collection of Documents')
|
|
@@ -13,6 +14,7 @@ def download_model(select_model:str):
|
|
| 13 |
spacy.cli.download(select_model)
|
| 14 |
return True
|
| 15 |
|
|
|
|
| 16 |
doc_bin = DocBin()
|
| 17 |
models = srsly.read_json('models.json')
|
| 18 |
models[''] = [] #require the user to choose a language
|
|
@@ -33,14 +35,16 @@ if language:
|
|
| 33 |
|
| 34 |
for uploaded_file in uploaded_files:
|
| 35 |
file_type = uploaded_file.type
|
| 36 |
-
|
| 37 |
-
temp = tempfile.NamedTemporaryFile()
|
| 38 |
temp.write(uploaded_file.getvalue())
|
| 39 |
try:
|
| 40 |
text = textract.process(temp.name)
|
| 41 |
text = text.decode('utf-8')
|
| 42 |
doc = nlp(text)
|
| 43 |
-
|
|
|
|
|
|
|
| 44 |
except Exception as e:
|
| 45 |
st.error(e)
|
| 46 |
|
|
|
|
| 2 |
import textract
|
| 3 |
import tempfile
|
| 4 |
import spacy
|
| 5 |
+
from spacy.tokens import DocBin, Doc
|
| 6 |
+
from collections import Counter
|
| 7 |
import srsly
|
| 8 |
|
| 9 |
st.title('Index and Search a Collection of Documents')
|
|
|
|
| 14 |
spacy.cli.download(select_model)
|
| 15 |
return True
|
| 16 |
|
| 17 |
+
|
| 18 |
doc_bin = DocBin()
|
| 19 |
models = srsly.read_json('models.json')
|
| 20 |
models[''] = [] #require the user to choose a language
|
|
|
|
| 35 |
|
| 36 |
for uploaded_file in uploaded_files:
|
| 37 |
file_type = uploaded_file.type
|
| 38 |
+
file_suffix = '.' + uploaded_file.name.split('.')[-1]
|
| 39 |
+
temp = tempfile.NamedTemporaryFile(suffix=file_suffix,)
|
| 40 |
temp.write(uploaded_file.getvalue())
|
| 41 |
try:
|
| 42 |
text = textract.process(temp.name)
|
| 43 |
text = text.decode('utf-8')
|
| 44 |
doc = nlp(text)
|
| 45 |
+
ent_freq = Counter([ent.label_ for ent in doc.ents])
|
| 46 |
+
for key, value in ent_freq.items():
|
| 47 |
+
st.write(key, value)
|
| 48 |
except Exception as e:
|
| 49 |
st.error(e)
|
| 50 |
|