Spaces:

Gobish
/

Summarizer

Runtime error

App Files Files Community

Gobish commited on Jan 19, 2022

Commit

b35b859

1 Parent(s): 5baeb2e

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -89

app.py CHANGED Viewed

@@ -1,96 +1,72 @@
-# Streamlit dependencies
-import streamlit as st
-import os
-# NLP dependencies
 import spacy
 from spacy.lang.en.stop_words import STOP_WORDS
 from string import punctuation
-# Sumy dependencies
-from sumy.parsers.plaintext import PlaintextParser
-from sumy.nlp.tokenizers import Tokenizer
-from sumy.summarizers.lex_rank import LexRankSummarizer
-# Sumy
-def sumy_summarizer(docx):
-	parser = PlaintextParser.from_string(docx,Tokenizer("english"))
-	lex_summarizer = LexRankSummarizer()
-	summary = lex_summarizer(parser.document,3)
-	summary_list = [str(sentence) for sentence in summary]
-	result = ' '.join(summary_list)
-	return result
-# Reading Time
-def readingTime(mytext):
-	total_words = len([ token.text for token in nlp(mytext)])
-	estimatedTime = total_words/200.0
-	return estimatedTime
-# Fetch Text From Url
-@st.cache
-def get_text(raw_url):
-	page = requests.get(raw_url)
-	soup = BeautifulSoup(page.content, 'html.parser')
-	fetched_text = ' '.join(map(lambda p:p.text,soup.find_all('p')))
-	return fetched_text
-# Main Function
 def main():
-	"""Summaryzer App with Streamlit """
-	st.title("Summaryzer and Entity Checker")
-	st.subheader("NLP Web App with Streamlit")
-	st.markdown("""
-	#### Description
-	+ This is a Natural Language Processing(NLP) web app
-	+ It summarizes the article you enter or the article in the url you provide
-	+ It can alse extract the entities for you
-	""")
-	# Tokenization
-	nlp = spacy.load('en_core_web_sm')
-	# Fetch Text From Url
-	raw_url = st.text_input("Enter url here","Type here")
-	if st.button("Extract"):
-		result = get_text(raw_url)
-		st.write(result)
-	# Summarization
-	if st.checkbox("Summarize"):
-		summary_options = st.selectbox("Choose Summarizer",['sumy','gensim'])
-		if st.button("Summarize"):
-			if summary_options == 'sumy':
-				st.text("Using Sumy Summarizer ..")
-				summary_result = sumy_summarizer(result)
-			elif summary_options == 'gensim':
-				st.text("Using Gensim Summarizer ..")
-				summary_result = summarize(result)
-			else:
-				st.warning("Using Default Summarizer")
-				st.text("Using Gensim Summarizer ..")
-				summary_result = summarize(result)
-			st.success(summary_result)
-	# Entity Extraction
-	if st.checkbox("Extract Entities"):
-		st.text("Using Spacy Entity Extractor ..")
-		docx = nlp(result)
-		entities = [(entity.text,entity.label_) for entity in docx.ents]
-		st.write(entities)
-	# Text Summarization and Entity Extraction
-	if st.button("Generate Summary"):
-		st.text("Wait for it ..")
-		summary_result = sumy_summarizer(result)
-		st.success(summary_result)
-		docx = nlp(summary_result)
-		entities = [(entity.text,entity.label_) for entity in docx.ents]
-		st.write(entities)
 if __name__ == '__main__':
-	main()

+"""
+Create a text summarization app using stremlit with a GUI
+"""
+import streamlit as st
 import spacy
 from spacy.lang.en.stop_words import STOP_WORDS
 from string import punctuation
+from heapq import nlargest
+# load the model
+nlp = spacy.load('en_core_web_sm')
+# add the stop words
+stopwords = list(STOP_WORDS)
+# add punctuation to stop words
+stopwords = stopwords + list(punctuation)
+# add words that aren't in the NLTK stopwords list
+other_exclusions = ["'s", "n't", "'m", "'re", "'ve", "'d", "'ll"]
+stopwords = stopwords + other_exclusions
+# function to get the keywords
+def get_summary(text):
+    doc = nlp(text)
+    tokens = [token.text for token in doc]
+    word_frequencies = {}
+    for word in doc:
+        if word.text.lower() not in stopwords:
+            if word.text.lower() not in word_frequencies.keys():
+                word_frequencies[word.text] = 1
+            else:
+                word_frequencies[word.text] += 1
+    # get the weighted frequencies
+    max_frequency = max(word_frequencies.values())
+    for word in word_frequencies.keys():
+        word_frequencies[word] = word_frequencies[word]/max_frequency
+    # get the sentences
+    sentence_tokens = [sent for sent in doc.sents]
+    sentence_scores = {}
+    for sent in sentence_tokens:
+        for word in sent:
+            if word.text.lower() in word_frequencies.keys():
+                if sent not in sentence_scores.keys():
+                    sentence_scores[sent] = word_frequencies[word.text.lower()]
+                else:
+                    sentence_scores[sent] += word_frequencies[word.text.lower()]
+    # get the summary
+    summary_sentences = nlargest(7, sentence_scores, key=sentence_scores.get)
+    final_sentences = [w.text for w in summary_sentences]
+    summary = ' '.join(final_sentences)
+    return summary
+# main function
 def main():
+    st.title('Text Summarizer')
+    st.subheader('Summarize your text')
+    message = st.text_area('Enter your text here', 'Type here')
+    summary_options = st.selectbox('Choose the summarizer', ['Gensim', 'Spacy'])
+    if st.button('Summarize'):
+        if summary_options == 'Gensim':
+            summary_result = get_summary(message)
+        elif summary_options == 'Spacy':
+            summary_result = nlp(message)
+            summary_result = ' '.join([sent.text for sent in summary_result.sents])
+        else:
+            st.write('Select a summarizer')
+        st.success(summary_result)
 if __name__ == '__main__':
+    main()