-
Notifications
You must be signed in to change notification settings - Fork 500
Expand file tree
/
Copy pathsummary_make.py
More file actions
36 lines (29 loc) · 1.35 KB
/
summary_make.py
File metadata and controls
36 lines (29 loc) · 1.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from preprocessing import EnglishCorpus
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.utils import get_stop_words
from sumy.summarizers.lex_rank import LexRankSummarizer
def summarize_sentences(sentences: str, language="english") -> list:
"""
Prepares the summary of sentences.
Calls preprocessing for generating a list of processed sentences.
Uses LexRank Summarization for preparing summary.
:param sentences: Sentences form the text file
:param language: Language used, default=English
:return: Summary of the source file
"""
# Preparation sentences
corpus_maker = EnglishCorpus()
preprocessed_sentences = corpus_maker.preprocessing(sentences)
preprocessed_sentence_list = corpus_maker.make_sentence_list(preprocessed_sentences)
corpus = corpus_maker.make_corpus()
parser = PlaintextParser.from_string(" ".join(corpus), Tokenizer(language))
# Using Rank system for tokenizing the Headwords
summarizer = LexRankSummarizer()
# Generating stopwords, i.e. words which are not affecting the context of the text.
summarizer.stop_words = get_stop_words(language)
# Limiting the summary to one-fifth of the article (See README)
summary = summarizer(
document=parser.document, sentences_count=len(corpus) * 2 // 10
)
return summary