Andy Janco commited on
Commit
6941336
·
1 Parent(s): 5e74ada

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -2,7 +2,8 @@ import streamlit as st
2
  import textract
3
  import tempfile
4
  import spacy
5
- from spacy.tokens import DocBin
 
6
  import srsly
7
 
8
  st.title('Index and Search a Collection of Documents')
@@ -13,6 +14,7 @@ def download_model(select_model:str):
13
  spacy.cli.download(select_model)
14
  return True
15
 
 
16
  doc_bin = DocBin()
17
  models = srsly.read_json('models.json')
18
  models[''] = [] #require the user to choose a language
@@ -33,14 +35,16 @@ if language:
33
 
34
  for uploaded_file in uploaded_files:
35
  file_type = uploaded_file.type
36
-
37
- temp = tempfile.NamedTemporaryFile()
38
  temp.write(uploaded_file.getvalue())
39
  try:
40
  text = textract.process(temp.name)
41
  text = text.decode('utf-8')
42
  doc = nlp(text)
43
- st.write(text)
 
 
44
  except Exception as e:
45
  st.error(e)
46
 
 
2
  import textract
3
  import tempfile
4
  import spacy
5
+ from spacy.tokens import DocBin, Doc
6
+ from collections import Counter
7
  import srsly
8
 
9
  st.title('Index and Search a Collection of Documents')
 
14
  spacy.cli.download(select_model)
15
  return True
16
 
17
+
18
  doc_bin = DocBin()
19
  models = srsly.read_json('models.json')
20
  models[''] = [] #require the user to choose a language
 
35
 
36
  for uploaded_file in uploaded_files:
37
  file_type = uploaded_file.type
38
+ file_suffix = '.' + uploaded_file.name.split('.')[-1]
39
+ temp = tempfile.NamedTemporaryFile(suffix=file_suffix,)
40
  temp.write(uploaded_file.getvalue())
41
  try:
42
  text = textract.process(temp.name)
43
  text = text.decode('utf-8')
44
  doc = nlp(text)
45
+ ent_freq = Counter([ent.label_ for ent in doc.ents])
46
+ for key, value in ent_freq.items():
47
+ st.write(key, value)
48
  except Exception as e:
49
  st.error(e)
50