Gobish commited on
Commit
b35b859
·
1 Parent(s): 5baeb2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -89
app.py CHANGED
@@ -1,96 +1,72 @@
1
- # Streamlit dependencies
2
- import streamlit as st
3
- import os
4
 
5
- # NLP dependencies
6
  import spacy
7
  from spacy.lang.en.stop_words import STOP_WORDS
8
  from string import punctuation
9
-
10
- # Sumy dependencies
11
- from sumy.parsers.plaintext import PlaintextParser
12
- from sumy.nlp.tokenizers import Tokenizer
13
- from sumy.summarizers.lex_rank import LexRankSummarizer
14
-
15
- # Sumy
16
- def sumy_summarizer(docx):
17
- parser = PlaintextParser.from_string(docx,Tokenizer("english"))
18
- lex_summarizer = LexRankSummarizer()
19
- summary = lex_summarizer(parser.document,3)
20
- summary_list = [str(sentence) for sentence in summary]
21
- result = ' '.join(summary_list)
22
- return result
23
-
24
- # Reading Time
25
- def readingTime(mytext):
26
- total_words = len([ token.text for token in nlp(mytext)])
27
- estimatedTime = total_words/200.0
28
- return estimatedTime
29
-
30
- # Fetch Text From Url
31
- @st.cache
32
- def get_text(raw_url):
33
- page = requests.get(raw_url)
34
- soup = BeautifulSoup(page.content, 'html.parser')
35
- fetched_text = ' '.join(map(lambda p:p.text,soup.find_all('p')))
36
- return fetched_text
37
-
38
- # Main Function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def main():
40
- """Summaryzer App with Streamlit """
41
- st.title("Summaryzer and Entity Checker")
42
- st.subheader("NLP Web App with Streamlit")
43
- st.markdown("""
44
- #### Description
45
- + This is a Natural Language Processing(NLP) web app
46
- + It summarizes the article you enter or the article in the url you provide
47
- + It can alse extract the entities for you
48
- """)
49
-
50
- # Tokenization
51
- nlp = spacy.load('en_core_web_sm')
52
-
53
- # Fetch Text From Url
54
- raw_url = st.text_input("Enter url here","Type here")
55
- if st.button("Extract"):
56
- result = get_text(raw_url)
57
- st.write(result)
58
-
59
- # Summarization
60
- if st.checkbox("Summarize"):
61
- summary_options = st.selectbox("Choose Summarizer",['sumy','gensim'])
62
- if st.button("Summarize"):
63
- if summary_options == 'sumy':
64
- st.text("Using Sumy Summarizer ..")
65
- summary_result = sumy_summarizer(result)
66
- elif summary_options == 'gensim':
67
- st.text("Using Gensim Summarizer ..")
68
- summary_result = summarize(result)
69
- else:
70
- st.warning("Using Default Summarizer")
71
- st.text("Using Gensim Summarizer ..")
72
- summary_result = summarize(result)
73
-
74
- st.success(summary_result)
75
-
76
- # Entity Extraction
77
- if st.checkbox("Extract Entities"):
78
- st.text("Using Spacy Entity Extractor ..")
79
- docx = nlp(result)
80
- entities = [(entity.text,entity.label_) for entity in docx.ents]
81
- st.write(entities)
82
-
83
- # Text Summarization and Entity Extraction
84
- if st.button("Generate Summary"):
85
- st.text("Wait for it ..")
86
- summary_result = sumy_summarizer(result)
87
- st.success(summary_result)
88
-
89
- docx = nlp(summary_result)
90
- entities = [(entity.text,entity.label_) for entity in docx.ents]
91
- st.write(entities)
92
-
93
-
94
 
95
  if __name__ == '__main__':
96
- main()
 
1
+ """
2
+ Create a text summarization app using stremlit with a GUI
3
+ """
4
 
5
+ import streamlit as st
6
  import spacy
7
  from spacy.lang.en.stop_words import STOP_WORDS
8
  from string import punctuation
9
+ from heapq import nlargest
10
+
11
+ # load the model
12
+ nlp = spacy.load('en_core_web_sm')
13
+
14
+ # add the stop words
15
+ stopwords = list(STOP_WORDS)
16
+
17
+ # add punctuation to stop words
18
+ stopwords = stopwords + list(punctuation)
19
+
20
+ # add words that aren't in the NLTK stopwords list
21
+ other_exclusions = ["'s", "n't", "'m", "'re", "'ve", "'d", "'ll"]
22
+ stopwords = stopwords + other_exclusions
23
+
24
+ # function to get the keywords
25
+ def get_summary(text):
26
+ doc = nlp(text)
27
+ tokens = [token.text for token in doc]
28
+ word_frequencies = {}
29
+ for word in doc:
30
+ if word.text.lower() not in stopwords:
31
+ if word.text.lower() not in word_frequencies.keys():
32
+ word_frequencies[word.text] = 1
33
+ else:
34
+ word_frequencies[word.text] += 1
35
+ # get the weighted frequencies
36
+ max_frequency = max(word_frequencies.values())
37
+ for word in word_frequencies.keys():
38
+ word_frequencies[word] = word_frequencies[word]/max_frequency
39
+ # get the sentences
40
+ sentence_tokens = [sent for sent in doc.sents]
41
+ sentence_scores = {}
42
+ for sent in sentence_tokens:
43
+ for word in sent:
44
+ if word.text.lower() in word_frequencies.keys():
45
+ if sent not in sentence_scores.keys():
46
+ sentence_scores[sent] = word_frequencies[word.text.lower()]
47
+ else:
48
+ sentence_scores[sent] += word_frequencies[word.text.lower()]
49
+ # get the summary
50
+ summary_sentences = nlargest(7, sentence_scores, key=sentence_scores.get)
51
+ final_sentences = [w.text for w in summary_sentences]
52
+ summary = ' '.join(final_sentences)
53
+ return summary
54
+
55
+ # main function
56
  def main():
57
+ st.title('Text Summarizer')
58
+ st.subheader('Summarize your text')
59
+ message = st.text_area('Enter your text here', 'Type here')
60
+ summary_options = st.selectbox('Choose the summarizer', ['Gensim', 'Spacy'])
61
+ if st.button('Summarize'):
62
+ if summary_options == 'Gensim':
63
+ summary_result = get_summary(message)
64
+ elif summary_options == 'Spacy':
65
+ summary_result = nlp(message)
66
+ summary_result = ' '.join([sent.text for sent in summary_result.sents])
67
+ else:
68
+ st.write('Select a summarizer')
69
+ st.success(summary_result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  if __name__ == '__main__':
72
+ main()