Text-Summarizer-Using-Nlp-Advanced-Copy1 Updated
Text-Summarizer-Using-Nlp-Advanced-Copy1 Updated
stop_words = set(stopwords.words('english'))
punctuation = punctuation + '\n' + '—' + '“' + ',' + '”' + '‘' + '-' + '’'
warnings.filterwarnings('ignore')
In [ ]:
array([ True, True, True, True, True, True, True, True, True,
Out[3]:
True])
array([ True, True, True, True, True, True, True, True, True,
Out[4]:
True])
In [5]: # Making one Dataframe by appending all of them for the further process
d = [df_1, df_2, df_3]
df = pd.concat(d, keys = ['x', 'y', 'z'])
df.rename(columns = {'content' : 'article'}, inplace = True);
In [6]: df.head()
Out[6]: Unnamed:
id title publication author date year month url article
0
x 0 WASHINGTON —
House Republicans Fret About New York 2016-
0 17283 Carl Hulse 2016.0 12.0 NaN Congressional
Winning Their Hea... Times 12-31
Republicans have...
x 0 WASHINGTON —
House Republicans Fret About New York 2016-
17283 Carl Hulse 2016.0 12.0 NaN Congressional Republicans
Winning Their Hea... Times 12-31
have...
Rift Between Officers and New York Benjamin Mueller 2017- After the bullet shells get
1 17284 2017.0 6.0 NaN
Residents as Killing... Times and Al Baker 06-19 counted, the blood...
Among Deaths in 2016, a Heavy New York 2017- Death may be the great
3 17286 William McDonald 2017.0 4.0 NaN
Toll in Pop Musi... Times 04-10 equalizer, but it isn’t...
Kim Jong-un Says North Korea Is New York 2017- SEOUL, South Korea —
4 17287 Choe Sang-Hun 2017.0 1.0 NaN
Preparing to T... Times 01-02 North Korea’s leader, ...
[Text(0.5, 0, 'Publication'),
Out[9]:
Text(0, 0.5, 'Count'),
Text(0.5, 1.0, 'Distribution of Publication according')]
In [11]: # Years
df['year'].value_counts()
2016.0 85405
Out[11]:
2017.0 50404
2015.0 3705
2013.0 228
2014.0 125
2012.0 34
2011.0 8
2010.0 6
2008.0 3
2009.0 3
2004.0 2
2003.0 2
2005.0 2
2007.0 1
2000.0 1
Name: year, dtype: int64
In [12]: # Countplot shows the distribution of the articles according to the year
plt.rcParams['figure.figsize'] = [15, 8]
sns.set(font_scale = 1.2, style = 'whitegrid')
sns_year = sns.countplot(df['year'], color = 'darkcyan')
sns_year.set(xlabel = "Year", ylabel = "Count", title = "Distribution of the articles according to the year
[Text(0.5, 0, 'Year'),
Out[12]:
Text(0, 0.5, 'Count'),
Text(0.5, 1.0, 'Distribution of the articles according to the year')]
In [13]: # Authors
df['author'].value_counts()
sns.barplot(df_author,df_author.index)
sns_year.set(xlabel = "count", ylabel = "author", title = "the most freq author")
# Converting to lowercase
article = article.str.lower()
# Removing the Trailing and leading whitespace and double spaces again as removing punctuation might
# Lead to a white space
article = article.apply(lambda x: re.sub(' +', ' ',x))
return article
# Function to normalize the word frequency which is used in the function word_frequency
def normalize(li_word):
global normalized_freq
normalized_freq = []
for dictionary in li_word:
max_frequency = max(dictionary.values())
for word in dictionary.keys():
dictionary[word] = dictionary[word]/max_frequency
normalized_freq.append(dictionary)
return normalized_freq
# Function which generates the summary of the articles (This uses the 20% of the sentences with the highest
def summary(sentence_score_OwO):
summary_list = []
for summ in sentence_score_OwO:
select_length = int(len(summ)*0.25)
summary_ = nlargest(select_length, summ, key = summ.get)
summary_list.append(".".join(summary_))
return summary_list
# Functions to change the article string (if passed) to change it to generate a pandas series
def make_series(art):
global dataframe
data_dict = {'article' : [art]}
dataframe = pd.DataFrame(data_dict)['article']
return dataframe
# Function which is to be called to generate the summary which in further calls other functions alltogether
def article_summarize(artefact):
if type(artefact) != pd.Series:
artefact = make_series(artefact)
df = preprocessing(artefact)
word_normalization = word_frequency(df)
sentence_score_OwO = sent_token(article_sent)
summarized_article = summary(sentence_score_OwO)
return summarized_article
In [17]: # Generating the Word Cloud of the article using the preprocessing and make_series function mentioned below
from wordcloud import WordCloud
def word_cloud(art):
art_ = make_series(art)
OwO = preprocessing(art_)
wordcloud_ = WordCloud(height = 500, width = 1000, background_color = 'white').generate(art)
plt.figure(figsize=(15, 10))
plt.imshow(wordcloud_, interpolation='bilinear')
plt.axis('off');
# Generating the summaries for the first 100 articles
summaries = article_summarize(df['article'][0:100])
In [21]: model_out = ['anticipating that the trump administration might not be inclined to mount a vigorous fight ag
'the incoming trump administration could choose to no longer defend the executive branch again
'in a potentially decision in 2015 judge rosemary m collyer ruled that house republicans had t
'in their request the lawyers predicted that a deal between house republicans and the new admi
'just as important to house republicans judge collyer found that congress had the standing to
'but on spending power and standing the trump administration may come under pressure from advo
reference=['anticipating that the trump administration might not be ready to mount a vigorous fight against
'the incoming trump administration could choose to no longer defend the executive branch again
'in a potentially decision in 2015 judge rosemary m collyer ruled that house republicans had t
'in their request the advocates predicted that a deal between house republicans and the new ad
'just as important to house republicans judge collyer found that congress had the standing to
'but on spending power and standing the trump administration may come under pressure from advo
text-summarization-using-nlp
Text cleaning
In [23]: text=""" Congressional Republicans have a new fear when it comes to their health care lawsuit against th
In [24]: len(text)
5592
Out[24]:
Word tokenization
In [29]: tokens = [token.text for token in doc]
print(tokens)
[' ', 'Congressional', 'Republicans', 'have', 'a', 'new', 'fear', 'when', 'it', 'comes', 'to', 'their', '
', 'health', 'care', 'lawsuit', 'against', 'the', 'Obama', 'administration', ':', 'They', 'might', 'wi
n', '.', 'The', 'incoming', 'Trump', 'administration', 'could', 'choose', 'to', 'no', 'longer', 'defend',
'the', 'executive', 'branch', 'against', 'the', 'suit', ',', 'which', 'challenges', 'the', 'administratio
n', '’s', 'authority', 'to', 'spend', 'billions', 'of', 'dollars', 'on', 'health', 'insurance', 'subsidie
s', 'for', ' ', 'and', ' ', 'Americans', ',', 'handing', 'House', 'Republicans', 'a', 'big', 'victory',
'on', ' ', 'issues', '.', 'But', 'a', 'sudden', 'loss', 'of', 'the', 'disputed', 'subsidies', 'could',
'conceivably', 'cause', 'the', 'health', 'care', 'program', 'to', 'implode', ',', 'leaving', 'millions',
'of', 'people', 'without', 'access', 'to', 'health', 'insurance', 'before', 'Republicans', 'have', 'prepar
ed', 'a', 'replacement', '.', 'That', 'could', 'lead', 'to', 'chaos', 'in', 'the', 'insurance', 'market',
'and', 'spur', 'a', 'political', 'backlash', 'just', 'as', 'Republicans', 'gain', 'full', 'control', 'of',
'the', 'government', '.', 'To', 'stave', 'off', 'that', 'outcome', ',', 'Republicans', 'could', 'find', 't
hemselves', 'in', 'the', 'awkward', 'position', 'of', 'appropriating', 'huge', 'sums', 'to', 'temporaril
y', 'prop', 'up', 'the', 'Obama', 'health', 'care', 'law', ',', 'angering', 'conservative', 'voters', 'wh
o', 'have', 'been', 'demanding', 'an', 'end', 'to', 'the', 'law', 'for', 'years', '.', 'In', 'another', 't
wist', ',', 'Donald', 'J.', 'Trump', '’s', 'administration', ',', 'worried', 'about', 'preserving', 'execu
tive', 'branch', 'prerogatives', ',', 'could', 'choose', 'to', 'fight', 'its', 'Republican', 'allies', 'i
n', 'the', 'House', 'on', 'some', 'central', 'questions', 'in', 'the', 'dispute', '.', 'Eager', 'to', 'avo
id', 'an', 'ugly', 'political', 'pileup', ',', 'Republicans', 'on', 'Capitol', 'Hill', 'and', 'the', 'Trum
p', 'transition', 'team', 'are', 'gaming', 'out', 'how', 'to', 'handle', 'the', 'lawsuit', ',', 'which',
',', 'after', 'the', 'election', ',', 'has', 'been', 'put', 'in', 'limbo', 'until', 'at', 'least', 'late',
'February', 'by', 'the', 'United', 'States', 'Court', 'of', 'Appeals', 'for', 'the', 'District', 'of', 'Co
lumbia', 'Circuit', '.', 'They', 'are', 'not', 'yet', 'ready', 'to', 'divulge', 'their', 'strategy', '.',
'“', 'Given', 'that', 'this', 'pending', 'litigation', 'involves', 'the', 'Obama', 'administration', 'an
d', 'Congress', ',', 'it', 'would', 'be', 'inappropriate', 'to', 'comment', ',', '”', 'said', 'Phillip',
'J.', 'Blando', ',', 'a', 'spokesman', 'for', 'the', 'Trump', 'transition', 'effort', '.', '“', 'Upon', 't
aking', 'office', ',', 'the', 'Trump', 'administration', 'will', 'evaluate', 'this', 'case', 'and', 'all',
'related', 'aspects', 'of', 'the', 'Affordable', 'Care', 'Act', '.', '”', 'In', 'a', 'potentially', ' ',
'decision', 'in', '2015', ',', 'Judge', 'Rosemary', 'M.', 'Collyer', 'ruled', 'that', 'House', 'Republican
s', 'had', 'the', 'standing', 'to', 'sue', 'the', 'executive', 'branch', 'over', 'a', 'spending', 'disput
e', 'and', 'that', 'the', 'Obama', 'administration', 'had', 'been', 'distributing', 'the', 'health', 'insu
rance', 'subsidies', ',', 'in', 'violation', 'of', 'the', 'Constitution', ',', 'without', 'approval', 'fro
m', 'Congress', '.', 'The', 'Justice', 'Department', ',', 'confident', 'that', 'Judge', 'Collyer', '’s',
'decision', 'would', 'be', 'reversed', ',', 'quickly', 'appealed', ',', 'and', 'the', 'subsidies', 'have',
'remained', 'in', 'place', 'during', 'the', 'appeal', '.', 'In', 'successfully', 'seeking', 'a', 'temporar
y', 'halt', 'in', 'the', 'proceedings', 'after', 'Mr.', 'Trump', 'won', ',', 'House', 'Republicans', 'las
t', 'month', 'told', 'the', 'court', 'that', 'they', '“', 'and', 'the', ' ', '’s', 'transition', 'team',
'currently', 'are', 'discussing', 'potential', 'options', 'for', 'resolution', 'of', 'this', 'matter',
',', 'to', 'take', 'effect', 'after', 'the', ' ', '’s', 'inauguration', 'on', 'Jan.', '20', ',', '2017',
'.', '”', 'The', 'suspension', 'of', 'the', 'case', ',', 'House', 'lawyers', 'said', ',', 'will', '“', 'pr
ovide', 'the', ' ', 'and', 'his', 'future', 'administration', 'time', 'to', 'consider', 'whether', 'to',
'continue', 'prosecuting', 'or', 'to', 'otherwise', 'resolve', 'this', 'appeal', '.', '”', 'Republican',
'leadership', 'officials', 'in', 'the', 'House', 'acknowledge', 'the', 'possibility', 'of', '“', 'cascadin
g', 'effects', '”', 'if', 'the', ' ', 'payments', ',', 'which', 'have', 'totaled', 'an', 'estimated',
'$', '13', 'billion', ',', 'are', 'suddenly', 'stopped', '.', 'Insurers', 'that', 'receive', 'the', 'subsi
dies', 'in', 'exchange', 'for', 'paying', ' ', 'costs', 'such', 'as', 'deductibles', 'and', ' ', 'for',
'eligible', 'consumers', 'could', 'race', 'to', 'drop', 'coverage', 'since', 'they', 'would', 'be', 'losin
g', 'money', '.', 'Over', 'all', ',', 'the', 'loss', 'of', 'the', 'subsidies', 'could', 'destabilize', 'th
e', 'entire', 'program', 'and', 'cause', 'a', 'lack', 'of', 'confidence', 'that', 'leads', 'other', 'insur
ers', 'to', 'seek', 'a', 'quick', 'exit', 'as', 'well', '.', 'Anticipating', 'that', 'the', 'Trump', 'admi
nistration', 'might', 'not', 'be', 'inclined', 'to', 'mount', 'a', 'vigorous', 'fight', 'against', 'the',
'House', 'Republicans', 'given', 'the', ' ', '’s', 'dim', 'view', 'of', 'the', 'health', 'care', 'law',
',', 'a', 'team', 'of', 'lawyers', 'this', 'month', 'sought', 'to', 'intervene', 'in', 'the', 'case', 'o
n', 'behalf', 'of', 'two', 'participants', 'in', 'the', 'health', 'care', 'program', '.', 'In', 'their',
'request', ',', 'the', 'lawyers', 'predicted', 'that', 'a', 'deal', 'between', 'House', 'Republicans', 'an
d', 'the', 'new', 'administration', 'to', 'dismiss', 'or', 'settle', 'the', 'case', '“', 'will', 'produc
e', 'devastating', 'consequences', 'for', 'the', 'individuals', 'who', 'receive', 'these', 'reductions',
',', 'as', 'well', 'as', 'for', 'the', 'nation', '’s', 'health', 'insurance', 'and', 'health', 'care', 'sy
stems', 'generally', '.', '”', 'No', 'matter', 'what', 'happens', ',', 'House', 'Republicans', 'say', ',',
'they', 'want', 'to', 'prevail', 'on', 'two', 'overarching', 'concepts', ':', 'the', 'congressional', 'pow
er', 'of', 'the', 'purse', ',', 'and', 'the', 'right', 'of', 'Congress', 'to', 'sue', 'the', 'executive',
'branch', 'if', 'it', 'violates', 'the', 'Constitution', 'regarding', 'that', 'spending', 'power', '.', 'H
ouse', 'Republicans', 'contend', 'that', 'Congress', 'never', 'appropriated', 'the', 'money', 'for', 'th
e', 'subsidies', ',', 'as', 'required', 'by', 'the', 'Constitution', '.', 'In', 'the', 'suit', ',', 'whic
h', 'was', 'initially', 'championed', 'by', 'John', 'A.', 'Boehner', ',', 'the', 'House', 'speaker', 'at',
'the', 'time', ',', 'and', 'later', 'in', 'House', 'committee', 'reports', ',', 'Republicans', 'asserted',
'that', 'the', 'administration', ',', 'desperate', 'for', 'the', 'funding', ',', 'had', 'required', 'the',
'Treasury', 'Department', 'to', 'provide', 'it', 'despite', 'widespread', 'internal', 'skepticism', 'tha
t', 'the', 'spending', 'was', 'proper', '.', 'The', 'White', 'House', 'said', 'that', 'the', 'spending',
'was', 'a', 'permanent', 'part', 'of', 'the', 'law', 'passed', 'in', '2010', ',', 'and', 'that', 'no', 'an
nual', 'appropriation', 'was', 'required', ' ', '—', ' ', 'even', 'though', 'the', 'administration', 'ini
tially', 'sought', 'one', '.', 'Just', 'as', 'important', 'to', 'House', 'Republicans', ',', 'Judge', 'Col
lyer', 'found', 'that', 'Congress', 'had', 'the', 'standing', 'to', 'sue', 'the', 'White', 'House', 'on',
'this', 'issue', ' ', '—', ' ', 'a', 'ruling', 'that', 'many', 'legal', 'experts', 'said', 'was', 'flawe
d', ' ', '—', ' ', 'and', 'they', 'want', 'that', 'precedent', 'to', 'be', 'set', 'to', 'restore', 'congr
essional', 'leverage', 'over', 'the', 'executive', 'branch', '.', 'But', 'on', 'spending', 'power', 'and',
'standing', ',', 'the', 'Trump', 'administration', 'may', 'come', 'under', 'pressure', 'from', 'advocate
s', 'of', 'presidential', 'authority', 'to', 'fight', 'the', 'House', 'no', 'matter', 'their', 'shared',
'views', 'on', 'health', 'care', ',', 'since', 'those', 'precedents', 'could', 'have', 'broad', 'repercuss
ions', '.', 'It', 'is', 'a', 'complicated', 'set', 'of', 'dynamics', 'illustrating', 'how', 'a', 'quick',
'legal', 'victory', 'for', 'the', 'House', 'in', 'the', 'Trump', 'era', 'might', 'come', 'with', 'costs',
'that', 'Republicans', 'never', 'anticipated', 'when', 'they', 'took', 'on', 'the', 'Obama', 'White', 'Hou
se', '.']
'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\n'
Out[30]:
In [31]: word_frequencies = {}
for word in doc:
if word.text.lower() not in stopwords:
if word.text.lower() not in punctuation:
if word.text not in word_frequencies.keys():
word_frequencies[word.text] = 1
else:
word_frequencies[word.text] += 1
In [32]: print(word_frequencies)
{' ': 7, 'Congressional': 1, 'Republicans': 15, 'new': 2, 'fear': 1, 'comes': 1, ' ': 3, 'health': 11,
'care': 7, 'lawsuit': 2, 'Obama': 5, 'administration': 13, 'win': 1, 'incoming': 1, 'Trump': 9, 'choose':
2, 'longer': 1, 'defend': 1, 'executive': 5, 'branch': 5, 'suit': 2, 'challenges': 1, 'authority': 2, 'spe
nd': 1, 'billions': 1, 'dollars': 1, 'insurance': 5, 'subsidies': 7, ' ': 9, 'Americans': 1, 'handing':
1, 'House': 18, 'big': 1, 'victory': 2, 'issues': 1, 'sudden': 1, 'loss': 2, 'disputed': 1, 'conceivably':
1, 'cause': 2, 'program': 3, 'implode': 1, 'leaving': 1, 'millions': 1, 'people': 1, 'access': 1, 'prepare
d': 1, 'replacement': 1, 'lead': 1, 'chaos': 1, 'market': 1, 'spur': 1, 'political': 2, 'backlash': 1, 'ga
in': 1, 'control': 1, 'government': 1, 'stave': 1, 'outcome': 1, 'find': 1, 'awkward': 1, 'position': 1,
'appropriating': 1, 'huge': 1, 'sums': 1, 'temporarily': 1, 'prop': 1, 'law': 4, 'angering': 1, 'conservat
ive': 1, 'voters': 1, 'demanding': 1, 'end': 1, 'years': 1, 'twist': 1, 'Donald': 1, 'J.': 2, 'worried':
1, 'preserving': 1, 'prerogatives': 1, 'fight': 3, 'Republican': 2, 'allies': 1, 'central': 1, 'question
s': 1, 'dispute': 2, 'Eager': 1, 'avoid': 1, 'ugly': 1, 'pileup': 1, 'Capitol': 1, 'Hill': 1, 'transitio
n': 3, 'team': 3, 'gaming': 1, 'handle': 1, 'election': 1, 'limbo': 1, 'late': 1, 'February': 1, 'United':
1, 'States': 1, 'Court': 1, 'Appeals': 1, 'District': 1, 'Columbia': 1, 'Circuit': 1, 'ready': 1, 'divulg
e': 1, 'strategy': 1, '“': 6, 'Given': 1, 'pending': 1, 'litigation': 1, 'involves': 1, 'Congress': 5, 'in
appropriate': 1, 'comment': 1, '”': 6, 'said': 4, 'Phillip': 1, 'Blando': 1, 'spokesman': 1, 'effort': 1,
'taking': 1, 'office': 1, 'evaluate': 1, 'case': 4, 'related': 1, 'aspects': 1, 'Affordable': 1, 'Care':
1, 'Act': 1, 'potentially': 1, 'decision': 2, '2015': 1, 'Judge': 3, 'Rosemary': 1, 'M.': 1, 'Collyer': 3,
'ruled': 1, 'standing': 3, 'sue': 3, 'spending': 5, 'distributing': 1, 'violation': 1, 'Constitution': 3,
'approval': 1, 'Justice': 1, 'Department': 2, 'confident': 1, 'reversed': 1, 'quickly': 1, 'appealed': 1,
'remained': 1, 'place': 1, 'appeal': 2, 'successfully': 1, 'seeking': 1, 'temporary': 1, 'halt': 1, 'proce
edings': 1, 'Mr.': 1, 'won': 1, 'month': 2, 'told': 1, 'court': 1, 'currently': 1, 'discussing': 1, 'poten
tial': 1, 'options': 1, 'resolution': 1, 'matter': 3, 'effect': 1, 'inauguration': 1, 'Jan.': 1, '20': 1,
'2017': 1, 'suspension': 1, 'lawyers': 3, 'provide': 2, 'future': 1, 'time': 2, 'consider': 1, 'continue':
1, 'prosecuting': 1, 'resolve': 1, 'leadership': 1, 'officials': 1, 'acknowledge': 1, 'possibility': 1, 'c
ascading': 1, 'effects': 1, 'payments': 1, 'totaled': 1, 'estimated': 1, '13': 1, 'billion': 1, 'suddenl
y': 1, 'stopped': 1, 'Insurers': 1, 'receive': 2, 'exchange': 1, 'paying': 1, 'costs': 2, 'deductibles':
1, 'eligible': 1, 'consumers': 1, 'race': 1, 'drop': 1, 'coverage': 1, 'losing': 1, 'money': 2, 'destabili
ze': 1, 'entire': 1, 'lack': 1, 'confidence': 1, 'leads': 1, 'insurers': 1, 'seek': 1, 'quick': 2, 'exit':
1, 'Anticipating': 1, 'inclined': 1, 'mount': 1, 'vigorous': 1, 'given': 1, 'dim': 1, 'view': 1, 'sought':
2, 'intervene': 1, 'behalf': 1, 'participants': 1, 'request': 1, 'predicted': 1, 'deal': 1, 'dismiss': 1,
'settle': 1, 'produce': 1, 'devastating': 1, 'consequences': 1, 'individuals': 1, 'reductions': 1, 'natio
n': 1, 'systems': 1, 'generally': 1, 'happens': 1, 'want': 2, 'prevail': 1, 'overarching': 1, 'concepts':
1, 'congressional': 2, 'power': 3, 'purse': 1, 'right': 1, 'violates': 1, 'contend': 1, 'appropriated': 1,
'required': 3, 'initially': 2, 'championed': 1, 'John': 1, 'A.': 1, 'Boehner': 1, 'speaker': 1, 'later':
1, 'committee': 1, 'reports': 1, 'asserted': 1, 'desperate': 1, 'funding': 1, 'Treasury': 1, 'despite': 1,
'widespread': 1, 'internal': 1, 'skepticism': 1, 'proper': 1, 'White': 3, 'permanent': 1, 'passed': 1, '20
10': 1, 'annual': 1, 'appropriation': 1, '—': 3, 'important': 1, 'found': 1, 'issue': 1, 'ruling': 1, 'leg
al': 2, 'experts': 1, 'flawed': 1, 'precedent': 1, 'set': 2, 'restore': 1, 'leverage': 1, 'come': 2, 'pres
sure': 1, 'advocates': 1, 'presidential': 1, 'shared': 1, 'views': 1, 'precedents': 1, 'broad': 1, 'reperc
ussions': 1, 'complicated': 1, 'dynamics': 1, 'illustrating': 1, 'era': 1, 'anticipated': 1, 'took': 1}
Sentence tokenization
In [33]: max_frequency = max(word_frequencies.values())
In [34]: max_frequency
18
Out[34]:
In [36]: print(word_frequencies)
{' ': 0.3888888888888889, 'Congressional': 0.05555555555555555, 'Republicans': 0.8333333333333334, 'new':
0.1111111111111111, 'fear': 0.05555555555555555, 'comes': 0.05555555555555555, ' ': 0.16666666666666666,
'health': 0.6111111111111112, 'care': 0.3888888888888889, 'lawsuit': 0.1111111111111111, 'Obama': 0.277777
7777777778, 'administration': 0.7222222222222222, 'win': 0.05555555555555555, 'incoming': 0.05555555555555
555, 'Trump': 0.5, 'choose': 0.1111111111111111, 'longer': 0.05555555555555555, 'defend': 0.05555555555555
555, 'executive': 0.2777777777777778, 'branch': 0.2777777777777778, 'suit': 0.1111111111111111, 'challenge
s': 0.05555555555555555, 'authority': 0.1111111111111111, 'spend': 0.05555555555555555, 'billions': 0.0555
5555555555555, 'dollars': 0.05555555555555555, 'insurance': 0.2777777777777778, 'subsidies': 0.38888888888
88889, ' ': 0.5, 'Americans': 0.05555555555555555, 'handing': 0.05555555555555555, 'House': 1.0, 'big':
0.05555555555555555, 'victory': 0.1111111111111111, 'issues': 0.05555555555555555, 'sudden': 0.05555555555
555555, 'loss': 0.1111111111111111, 'disputed': 0.05555555555555555, 'conceivably': 0.05555555555555555,
'cause': 0.1111111111111111, 'program': 0.16666666666666666, 'implode': 0.05555555555555555, 'leaving': 0.
05555555555555555, 'millions': 0.05555555555555555, 'people': 0.05555555555555555, 'access': 0.05555555555
555555, 'prepared': 0.05555555555555555, 'replacement': 0.05555555555555555, 'lead': 0.05555555555555555,
'chaos': 0.05555555555555555, 'market': 0.05555555555555555, 'spur': 0.05555555555555555, 'political': 0.1
111111111111111, 'backlash': 0.05555555555555555, 'gain': 0.05555555555555555, 'control': 0.05555555555555
555, 'government': 0.05555555555555555, 'stave': 0.05555555555555555, 'outcome': 0.05555555555555555, 'fin
d': 0.05555555555555555, 'awkward': 0.05555555555555555, 'position': 0.05555555555555555, 'appropriating':
0.05555555555555555, 'huge': 0.05555555555555555, 'sums': 0.05555555555555555, 'temporarily': 0.0555555555
5555555, 'prop': 0.05555555555555555, 'law': 0.2222222222222222, 'angering': 0.05555555555555555, 'conserv
ative': 0.05555555555555555, 'voters': 0.05555555555555555, 'demanding': 0.05555555555555555, 'end': 0.055
55555555555555, 'years': 0.05555555555555555, 'twist': 0.05555555555555555, 'Donald': 0.05555555555555555,
'J.': 0.1111111111111111, 'worried': 0.05555555555555555, 'preserving': 0.05555555555555555, 'prerogative
s': 0.05555555555555555, 'fight': 0.16666666666666666, 'Republican': 0.1111111111111111, 'allies': 0.05555
555555555555, 'central': 0.05555555555555555, 'questions': 0.05555555555555555, 'dispute': 0.1111111111111
111, 'Eager': 0.05555555555555555, 'avoid': 0.05555555555555555, 'ugly': 0.05555555555555555, 'pileup': 0.
05555555555555555, 'Capitol': 0.05555555555555555, 'Hill': 0.05555555555555555, 'transition': 0.1666666666
6666666, 'team': 0.16666666666666666, 'gaming': 0.05555555555555555, 'handle': 0.05555555555555555, 'elect
ion': 0.05555555555555555, 'limbo': 0.05555555555555555, 'late': 0.05555555555555555, 'February': 0.055555
55555555555, 'United': 0.05555555555555555, 'States': 0.05555555555555555, 'Court': 0.05555555555555555,
'Appeals': 0.05555555555555555, 'District': 0.05555555555555555, 'Columbia': 0.05555555555555555, 'Circui
t': 0.05555555555555555, 'ready': 0.05555555555555555, 'divulge': 0.05555555555555555, 'strategy': 0.05555
555555555555, '“': 0.3333333333333333, 'Given': 0.05555555555555555, 'pending': 0.05555555555555555, 'liti
gation': 0.05555555555555555, 'involves': 0.05555555555555555, 'Congress': 0.2777777777777778, 'inappropri
ate': 0.05555555555555555, 'comment': 0.05555555555555555, '”': 0.3333333333333333, 'said': 0.222222222222
2222, 'Phillip': 0.05555555555555555, 'Blando': 0.05555555555555555, 'spokesman': 0.05555555555555555, 'ef
fort': 0.05555555555555555, 'taking': 0.05555555555555555, 'office': 0.05555555555555555, 'evaluate': 0.05
555555555555555, 'case': 0.2222222222222222, 'related': 0.05555555555555555, 'aspects': 0.0555555555555555
5, 'Affordable': 0.05555555555555555, 'Care': 0.05555555555555555, 'Act': 0.05555555555555555, 'potentiall
y': 0.05555555555555555, 'decision': 0.1111111111111111, '2015': 0.05555555555555555, 'Judge': 0.166666666
66666666, 'Rosemary': 0.05555555555555555, 'M.': 0.05555555555555555, 'Collyer': 0.16666666666666666, 'rul
ed': 0.05555555555555555, 'standing': 0.16666666666666666, 'sue': 0.16666666666666666, 'spending': 0.27777
77777777778, 'distributing': 0.05555555555555555, 'violation': 0.05555555555555555, 'Constitution': 0.1666
6666666666666, 'approval': 0.05555555555555555, 'Justice': 0.05555555555555555, 'Department': 0.1111111111
111111, 'confident': 0.05555555555555555, 'reversed': 0.05555555555555555, 'quickly': 0.05555555555555555,
'appealed': 0.05555555555555555, 'remained': 0.05555555555555555, 'place': 0.05555555555555555, 'appeal':
0.1111111111111111, 'successfully': 0.05555555555555555, 'seeking': 0.05555555555555555, 'temporary': 0.05
555555555555555, 'halt': 0.05555555555555555, 'proceedings': 0.05555555555555555, 'Mr.': 0.055555555555555
55, 'won': 0.05555555555555555, 'month': 0.1111111111111111, 'told': 0.05555555555555555, 'court': 0.05555
555555555555, 'currently': 0.05555555555555555, 'discussing': 0.05555555555555555, 'potential': 0.05555555
555555555, 'options': 0.05555555555555555, 'resolution': 0.05555555555555555, 'matter': 0.1666666666666666
6, 'effect': 0.05555555555555555, 'inauguration': 0.05555555555555555, 'Jan.': 0.05555555555555555, '20':
0.05555555555555555, '2017': 0.05555555555555555, 'suspension': 0.05555555555555555, 'lawyers': 0.16666666
666666666, 'provide': 0.1111111111111111, 'future': 0.05555555555555555, 'time': 0.1111111111111111, 'cons
ider': 0.05555555555555555, 'continue': 0.05555555555555555, 'prosecuting': 0.05555555555555555, 'resolv
e': 0.05555555555555555, 'leadership': 0.05555555555555555, 'officials': 0.05555555555555555, 'acknowledg
e': 0.05555555555555555, 'possibility': 0.05555555555555555, 'cascading': 0.05555555555555555, 'effects':
0.05555555555555555, 'payments': 0.05555555555555555, 'totaled': 0.05555555555555555, 'estimated': 0.05555
555555555555, '13': 0.05555555555555555, 'billion': 0.05555555555555555, 'suddenly': 0.05555555555555555,
'stopped': 0.05555555555555555, 'Insurers': 0.05555555555555555, 'receive': 0.1111111111111111, 'exchang
e': 0.05555555555555555, 'paying': 0.05555555555555555, 'costs': 0.1111111111111111, 'deductibles': 0.0555
5555555555555, 'eligible': 0.05555555555555555, 'consumers': 0.05555555555555555, 'race': 0.05555555555555
555, 'drop': 0.05555555555555555, 'coverage': 0.05555555555555555, 'losing': 0.05555555555555555, 'money':
0.1111111111111111, 'destabilize': 0.05555555555555555, 'entire': 0.05555555555555555, 'lack': 0.055555555
55555555, 'confidence': 0.05555555555555555, 'leads': 0.05555555555555555, 'insurers': 0.0555555555555555
5, 'seek': 0.05555555555555555, 'quick': 0.1111111111111111, 'exit': 0.05555555555555555, 'Anticipating':
0.05555555555555555, 'inclined': 0.05555555555555555, 'mount': 0.05555555555555555, 'vigorous': 0.05555555
555555555, 'given': 0.05555555555555555, 'dim': 0.05555555555555555, 'view': 0.05555555555555555, 'sough
t': 0.1111111111111111, 'intervene': 0.05555555555555555, 'behalf': 0.05555555555555555, 'participants':
0.05555555555555555, 'request': 0.05555555555555555, 'predicted': 0.05555555555555555, 'deal': 0.055555555
55555555, 'dismiss': 0.05555555555555555, 'settle': 0.05555555555555555, 'produce': 0.05555555555555555,
'devastating': 0.05555555555555555, 'consequences': 0.05555555555555555, 'individuals': 0.0555555555555555
5, 'reductions': 0.05555555555555555, 'nation': 0.05555555555555555, 'systems': 0.05555555555555555, 'gene
rally': 0.05555555555555555, 'happens': 0.05555555555555555, 'want': 0.1111111111111111, 'prevail': 0.0555
5555555555555, 'overarching': 0.05555555555555555, 'concepts': 0.05555555555555555, 'congressional': 0.111
1111111111111, 'power': 0.16666666666666666, 'purse': 0.05555555555555555, 'right': 0.05555555555555555,
'violates': 0.05555555555555555, 'contend': 0.05555555555555555, 'appropriated': 0.05555555555555555, 'req
uired': 0.16666666666666666, 'initially': 0.1111111111111111, 'championed': 0.05555555555555555, 'John':
0.05555555555555555, 'A.': 0.05555555555555555, 'Boehner': 0.05555555555555555, 'speaker': 0.0555555555555
5555, 'later': 0.05555555555555555, 'committee': 0.05555555555555555, 'reports': 0.05555555555555555, 'ass
erted': 0.05555555555555555, 'desperate': 0.05555555555555555, 'funding': 0.05555555555555555, 'Treasury':
0.05555555555555555, 'despite': 0.05555555555555555, 'widespread': 0.05555555555555555, 'internal': 0.0555
5555555555555, 'skepticism': 0.05555555555555555, 'proper': 0.05555555555555555, 'White': 0.16666666666666
666, 'permanent': 0.05555555555555555, 'passed': 0.05555555555555555, '2010': 0.05555555555555555, 'annua
l': 0.05555555555555555, 'appropriation': 0.05555555555555555, '—': 0.16666666666666666, 'important': 0.05
555555555555555, 'found': 0.05555555555555555, 'issue': 0.05555555555555555, 'ruling': 0.0555555555555555
5, 'legal': 0.1111111111111111, 'experts': 0.05555555555555555, 'flawed': 0.05555555555555555, 'preceden
t': 0.05555555555555555, 'set': 0.1111111111111111, 'restore': 0.05555555555555555, 'leverage': 0.05555555
555555555, 'come': 0.1111111111111111, 'pressure': 0.05555555555555555, 'advocates': 0.05555555555555555,
'presidential': 0.05555555555555555, 'shared': 0.05555555555555555, 'views': 0.05555555555555555, 'precede
nts': 0.05555555555555555, 'broad': 0.05555555555555555, 'repercussions': 0.05555555555555555, 'complicate
d': 0.05555555555555555, 'dynamics': 0.05555555555555555, 'illustrating': 0.05555555555555555, 'era': 0.05
555555555555555, 'anticipated': 0.05555555555555555, 'took': 0.05555555555555555}
[ , Congressional Republicans have a new fear when it comes to their health care lawsuit against the Ob
ama administration: They might win., The incoming Trump administration could choose to no longer defend th
e executive branch against the suit, which challenges the administration’s authority to spend billions of
dollars on health insurance subsidies for and Americans, handing House Republicans a big victory on
issues., But a sudden loss of the disputed subsidies could conceivably cause the health care program to im
plode, leaving millions of people without access to health insurance before Republicans have prepared a re
placement., That could lead to chaos in the insurance market and spur a political backlash just as Republi
cans gain full control of the government., To stave off that outcome, Republicans could find themselves in
the awkward position of appropriating huge sums to temporarily prop up the Obama health care law, angering
conservative voters who have been demanding an end to the law for years., In another twist, Donald J. Trum
p’s administration, worried about preserving executive branch prerogatives, could choose to fight its Repu
blican allies in the House on some central questions in the dispute., Eager to avoid an ugly political pil
eup, Republicans on Capitol Hill and the Trump transition team are gaming out how to handle the lawsuit, w
hich, after the election, has been put in limbo until at least late February by the United States Court of
Appeals for the District of Columbia Circuit., They are not yet ready to divulge their strategy., “Given t
hat this pending litigation involves the Obama administration and Congress, it would be inappropriate to c
omment,” said Phillip J. Blando, a spokesman for the Trump transition effort., “Upon taking office, the Tr
ump administration will evaluate this case and all related aspects of the Affordable Care Act. ”, In a pot
entially decision in 2015, Judge Rosemary M. Collyer ruled that House Republicans had the standing to su
e the executive branch over a spending dispute and that the Obama administration had been distributing the
health insurance subsidies, in violation of the Constitution, without approval from Congress., The Justice
Department, confident that Judge Collyer’s decision would be reversed, quickly appealed, and the subsidies
have remained in place during the appeal., In successfully seeking a temporary halt in the proceedings aft
er Mr. Trump won, House Republicans last month told the court that they “and the ’s transition team curre
ntly are discussing potential options for resolution of this matter, to take effect after the ’s inaugura
tion on Jan. 20, 2017. ”, The suspension of the case, House lawyers said, will “provide the and his futu
re administration time to consider whether to continue prosecuting or to otherwise resolve this appeal. ”,
Republican leadership officials in the House acknowledge the possibility of “cascading effects” if the p
ayments, which have totaled an estimated $13 billion, are suddenly stopped., Insurers that receive the sub
sidies in exchange for paying costs such as deductibles and for eligible consumers could race to drop
coverage since they would be losing money., Over all, the loss of the subsidies could destabilize the enti
re program and cause a lack of confidence that leads other insurers to seek a quick exit as well., Anticip
ating that the Trump administration might not be inclined to mount a vigorous fight against the House Repu
blicans given the ’s dim view of the health care law, a team of lawyers this month sought to intervene in
the case on behalf of two participants in the health care program., In their request, the lawyers predicte
d that a deal between House Republicans and the new administration to dismiss or settle the case “will pro
duce devastating consequences for the individuals who receive these reductions, as well as for the natio
n’s health insurance and health care systems generally. ”, No matter what happens, House Republicans say,
they want to prevail on two overarching concepts: the congressional power of the purse, and the right of C
ongress to sue the executive branch if it violates the Constitution regarding that spending power., House
Republicans contend that Congress never appropriated the money for the subsidies, as required by the Const
itution., In the suit, which was initially championed by John A. Boehner, the House speaker at the time, a
nd later in House committee reports, Republicans asserted that the administration, desperate for the fundi
ng, had required the Treasury Department to provide it despite widespread internal skepticism that the spe
nding was proper., The White House said that the spending was a permanent part of the law passed in 2010,
and that no annual appropriation was required — even though the administration initially sought one., J
ust as important to House Republicans, Judge Collyer found that Congress had the standing to sue the White
House on this issue — a ruling that many legal experts said was flawed — and they want that preceden
t to be set to restore congressional leverage over the executive branch., But on spending power and standi
ng, the Trump administration may come under pressure from advocates of presidential authority to fight the
House no matter their shared views on health care, since those precedents could have broad repercussions.,
It is a complicated set of dynamics illustrating how a quick legal victory for the House in the Trump era
might come with costs that Republicans never anticipated when they took on the Obama White House.]
Word frequency table
In [38]: sentence_scores = {}
for sent in sentence_tokens:
for word in sent:
if word.text.lower() in word_frequencies.keys():
if sent not in sentence_scores.keys():
sentence_scores[sent] = word_frequencies[word.text.lower()]
else:
sentence_scores[sent] += word_frequencies[word.text.lower()]
In [39]: sentence_scores
{ : 0.3888888888888889,
Out[39]:
Congressional Republicans have a new fear when it comes to their health care lawsuit against the Obama
administration: They might win.: 2.388888888888889,
The incoming Trump administration could choose to no longer defend the executive branch against the suit,
which challenges the administration’s authority to spend billions of dollars on health insurance subsidies
for and Americans, handing House Republicans a big victory on issues.: 5.444444444444443,
But a sudden loss of the disputed subsidies could conceivably cause the health care program to implode, l
eaving millions of people without access to health insurance before Republicans have prepared a replacemen
t.: 3.222222222222221,
That could lead to chaos in the insurance market and spur a political backlash just as Republicans gain f
ull control of the government.: 0.8333333333333335,
To stave off that outcome, Republicans could find themselves in the awkward position of appropriating hug
e sums to temporarily prop up the Obama health care law, angering conservative voters who have been demand
ing an end to the law for years.: 2.3333333333333335,
In another twist, Donald J. Trump’s administration, worried about preserving executive branch prerogative
s, could choose to fight its Republican allies in the House on some central questions in the dispute.: 2.0
55555555555556,
Eager to avoid an ugly political pileup, Republicans on Capitol Hill and the Trump transition team are ga
ming out how to handle the lawsuit, which, after the election, has been put in limbo until at least late F
ebruary by the United States Court of Appeals for the District of Columbia Circuit.: 1.0555555555555556,
They are not yet ready to divulge their strategy.: 0.16666666666666666,
“Given that this pending litigation involves the Obama administration and Congress, it would be inappropr
iate to comment,” said Phillip J. Blando, a spokesman for the Trump transition effort.: 2.222222222222222,
“Upon taking office, the Trump administration will evaluate this case and all related aspects of the Affo
rdable Care Act. ”: 2.2777777777777777,
In a potentially decision in 2015, Judge Rosemary M. Collyer ruled that House Republicans had the stand
ing to sue the executive branch over a spending dispute and that the Obama administration had been distrib
uting the health insurance subsidies, in violation of the Constitution, without approval from Congress.:
4.222222222222221,
The Justice Department, confident that Judge Collyer’s decision would be reversed, quickly appealed, and
the subsidies have remained in place during the appeal.: 0.9444444444444446,
In successfully seeking a temporary halt in the proceedings after Mr. Trump won, House Republicans last m
onth told the court that they “and the ’s transition team currently are discussing potential options for
resolution of this matter, to take effect after the ’s inauguration on Jan. 20, 2017. ”: 2.99999999999999
96,
The suspension of the case, House lawyers said, will “provide the and his future administration time to
consider whether to continue prosecuting or to otherwise resolve this appeal. ”: 3.166666666666666,
Republican leadership officials in the House acknowledge the possibility of “cascading effects” if the
payments, which have totaled an estimated $13 billion, are suddenly stopped.: 1.888888888888889,
Insurers that receive the subsidies in exchange for paying costs such as deductibles and for eligibl
e consumers could race to drop coverage since they would be losing money.: 1.9444444444444446,
Over all, the loss of the subsidies could destabilize the entire program and cause a lack of confidence t
hat leads other insurers to seek a quick exit as well.: 1.3333333333333335,
Anticipating that the Trump administration might not be inclined to mount a vigorous fight against the Ho
use Republicans given the ’s dim view of the health care law, a team of lawyers this month sought to inte
rvene in the case on behalf of two participants in the health care program.: 4.944444444444445,
In their request, the lawyers predicted that a deal between House Republicans and the new administration
to dismiss or settle the case “will produce devastating consequences for the individuals who receive these
reductions, as well as for the nation’s health insurance and health care systems generally. ”: 4.611111111
11111,
No matter what happens, House Republicans say, they want to prevail on two overarching concepts: the cong
ressional power of the purse, and the right of Congress to sue the executive branch if it violates the Con
stitution regarding that spending power.: 2.111111111111111,
House Republicans contend that Congress never appropriated the money for the subsidies, as required by th
e Constitution.: 0.7777777777777778,
In the suit, which was initially championed by John A. Boehner, the House speaker at the time, and later
in House committee reports, Republicans asserted that the administration, desperate for the funding, had r
equired the Treasury Department to provide it despite widespread internal skepticism that the spending was
proper.: 2.333333333333333,
The White House said that the spending was a permanent part of the law passed in 2010, and that no annual
appropriation was required — even though the administration initially sought one.: 3.166666666666667,
Just as important to House Republicans, Judge Collyer found that Congress had the standing to sue the Whi
te House on this issue — a ruling that many legal experts said was flawed — and they want that prece
dent to be set to restore congressional leverage over the executive branch.: 4.166666666666666,
But on spending power and standing, the Trump administration may come under pressure from advocates of pr
esidential authority to fight the House no matter their shared views on health care, since those precedent
s could have broad repercussions.: 3.3333333333333326,
It is a complicated set of dynamics illustrating how a quick legal victory for the House in the Trump era
might come with costs that Republicans never anticipated when they took on the Obama White House.: 1.00000
00000000002}
Summarization
In [40]: from heapq import nlargest
8
Out[41]:
In [43]: summary
[The incoming Trump administration could choose to no longer defend the executive branch against the suit,
Out[43]:
which challenges the administration’s authority to spend billions of dollars on health insurance subsidies
for and Americans, handing House Republicans a big victory on issues.,
Anticipating that the Trump administration might not be inclined to mount a vigorous fight against the Ho
use Republicans given the ’s dim view of the health care law, a team of lawyers this month sought to inte
rvene in the case on behalf of two participants in the health care program.,
In their request, the lawyers predicted that a deal between House Republicans and the new administration
to dismiss or settle the case “will produce devastating consequences for the individuals who receive these
reductions, as well as for the nation’s health insurance and health care systems generally. ”,
In a potentially decision in 2015, Judge Rosemary M. Collyer ruled that House Republicans had the stand
ing to sue the executive branch over a spending dispute and that the Obama administration had been distrib
uting the health insurance subsidies, in violation of the Constitution, without approval from Congress.,
Just as important to House Republicans, Judge Collyer found that Congress had the standing to sue the Whi
te House on this issue — a ruling that many legal experts said was flawed — and they want that prece
dent to be set to restore congressional leverage over the executive branch.,
But on spending power and standing, the Trump administration may come under pressure from advocates of pr
esidential authority to fight the House no matter their shared views on health care, since those precedent
s could have broad repercussions.,
But a sudden loss of the disputed subsidies could conceivably cause the health care program to implode, l
eaving millions of people without access to health insurance before Republicans have prepared a replacemen
t.,
The White House said that the spending was a permanent part of the law passed in 2010, and that no annual
appropriation was required — even though the administration initially sought one.]
In [46]: print(text)
Congressional Republicans have a new fear when it comes to their health care lawsuit against the Obama
administration: They might win. The incoming Trump administration could choose to no longer defend the exe
cutive branch against the suit, which challenges the administration’s authority to spend billions of dolla
rs on health insurance subsidies for and Americans, handing House Republicans a big victory on issu
es. But a sudden loss of the disputed subsidies could conceivably cause the health care program to implod
e, leaving millions of people without access to health insurance before Republicans have prepared a replac
ement. That could lead to chaos in the insurance market and spur a political backlash just as Republicans
gain full control of the government. To stave off that outcome, Republicans could find themselves in the a
wkward position of appropriating huge sums to temporarily prop up the Obama health care law, angering cons
ervative voters who have been demanding an end to the law for years. In another twist, Donald J. Trump’s a
dministration, worried about preserving executive branch prerogatives, could choose to fight its Republica
n allies in the House on some central questions in the dispute. Eager to avoid an ugly political pileup, R
epublicans on Capitol Hill and the Trump transition team are gaming out how to handle the lawsuit, which,
after the election, has been put in limbo until at least late February by the United States Court of Appea
ls for the District of Columbia Circuit. They are not yet ready to divulge their strategy. “Given that thi
s pending litigation involves the Obama administration and Congress, it would be inappropriate to commen
t,” said Phillip J. Blando, a spokesman for the Trump transition effort. “Upon taking office, the Trump ad
ministration will evaluate this case and all related aspects of the Affordable Care Act. ” In a potentiall
y decision in 2015, Judge Rosemary M. Collyer ruled that House Republicans had the standing to sue the e
xecutive branch over a spending dispute and that the Obama administration had been distributing the health
insurance subsidies, in violation of the Constitution, without approval from Congress. The Justice Departm
ent, confident that Judge Collyer’s decision would be reversed, quickly appealed, and the subsidies have r
emained in place during the appeal. In successfully seeking a temporary halt in the proceedings after Mr.
Trump won, House Republicans last month told the court that they “and the ’s transition team currently ar
e discussing potential options for resolution of this matter, to take effect after the ’s inauguration on
Jan. 20, 2017. ” The suspension of the case, House lawyers said, will “provide the and his future admini
stration time to consider whether to continue prosecuting or to otherwise resolve this appeal. ” Republica
n leadership officials in the House acknowledge the possibility of “cascading effects” if the payments,
which have totaled an estimated $13 billion, are suddenly stopped. Insurers that receive the subsidies in
exchange for paying costs such as deductibles and for eligible consumers could race to drop coverage
since they would be losing money. Over all, the loss of the subsidies could destabilize the entire program
and cause a lack of confidence that leads other insurers to seek a quick exit as well. Anticipating that t
he Trump administration might not be inclined to mount a vigorous fight against the House Republicans give
n the ’s dim view of the health care law, a team of lawyers this month sought to intervene in the case on
behalf of two participants in the health care program. In their request, the lawyers predicted that a deal
between House Republicans and the new administration to dismiss or settle the case “will produce devastati
ng consequences for the individuals who receive these reductions, as well as for the nation’s health insur
ance and health care systems generally. ” No matter what happens, House Republicans say, they want to prev
ail on two overarching concepts: the congressional power of the purse, and the right of Congress to sue th
e executive branch if it violates the Constitution regarding that spending power. House Republicans conten
d that Congress never appropriated the money for the subsidies, as required by the Constitution. In the su
it, which was initially championed by John A. Boehner, the House speaker at the time, and later in House c
ommittee reports, Republicans asserted that the administration, desperate for the funding, had required th
e Treasury Department to provide it despite widespread internal skepticism that the spending was proper. T
he White House said that the spending was a permanent part of the law passed in 2010, and that no annual a
ppropriation was required — even though the administration initially sought one. Just as important to H
ouse Republicans, Judge Collyer found that Congress had the standing to sue the White House on this issue
— a ruling that many legal experts said was flawed — and they want that precedent to be set to restor
e congressional leverage over the executive branch. But on spending power and standing, the Trump administ
ration may come under pressure from advocates of presidential authority to fight the House no matter their
shared views on health care, since those precedents could have broad repercussions. It is a complicated se
t of dynamics illustrating how a quick legal victory for the House in the Trump era might come with costs
that Republicans never anticipated when they took on the Obama White House.
In [47]: print(summary)
The incoming Trump administration could choose to no longer defend the executive branch against the suit,
which challenges the administration’s authority to spend billions of dollars on health insurance subsidies
for and Americans, handing House Republicans a big victory on issues. Anticipating that the Trump a
dministration might not be inclined to mount a vigorous fight against the House Republicans given the ’s
dim view of the health care law, a team of lawyers this month sought to intervene in the case on behalf of
two participants in the health care program. In their request, the lawyers predicted that a deal between H
ouse Republicans and the new administration to dismiss or settle the case “will produce devastating conseq
uences for the individuals who receive these reductions, as well as for the nation’s health insurance and
health care systems generally. ” In a potentially decision in 2015, Judge Rosemary M. Collyer ruled that
House Republicans had the standing to sue the executive branch over a spending dispute and that the Obama
administration had been distributing the health insurance subsidies, in violation of the Constitution, wit
hout approval from Congress. Just as important to House Republicans, Judge Collyer found that Congress had
the standing to sue the White House on this issue — a ruling that many legal experts said was flawed —
and they want that precedent to be set to restore congressional leverage over the executive branch. But
on spending power and standing, the Trump administration may come under pressure from advocates of preside
ntial authority to fight the House no matter their shared views on health care, since those precedents cou
ld have broad repercussions. But a sudden loss of the disputed subsidies could conceivably cause the healt
h care program to implode, leaving millions of people without access to health insurance before Republican
s have prepared a replacement. The White House said that the spending was a permanent part of the law pass
ed in 2010, and that no annual appropriation was required — even though the administration initially so
ught one.
In [48]: len(summary)
2134
Out[48]:
text = '''WASHINGTON — Congressional Republicans have a new fear when it comes to their health care l
tokens = nlp(text)
WASHINGTON — Congressional Republicans have a new fear when it comes to their health care lawsuit ag
ainst the Obama administration: They might win.
The incoming Trump administration could choose to no longer defend the executive branch against the suit,
which challenges the administration’s authority to spend billions of dollars on health insurance subsidies
for and Americans, handing House Republicans a big victory on issues.
But a sudden loss of the disputed subsidies could conceivably cause the health care program to implode, le
aving millions of people without access to health insurance before Republicans have prepared a replacemen
t.
That could lead to chaos in the insurance market and spur a political backlash just as Republicans gain fu
ll control of the government.
To stave off that outcome, Republicans could find themselves in the awkward position of appropriating huge
sums to temporarily prop up the Obama health care law, angering conservative voters who have been demandin
g an end to the law for years.
In another twist, Donald J. Trump’s administration, worried about preserving executive branch prerogative
s, could choose to fight its Republican allies in the House on some central questions in the dispute.
Eager to avoid an ugly political pileup, Republicans on Capitol Hill and the Trump transition team are gam
ing out how to handle the lawsuit, which, after the election, has been put in limbo until at least late Fe
bruary by the United States Court of Appeals for the District of Columbia Circuit.
They are not yet ready to divulge their strategy.
“Given that this pending litigation involves the Obama administration and Congress, it would be inappropri
ate to comment,” said Phillip J. Blando, a spokesman for the Trump transition effort.
“Upon taking office, the Trump administration will evaluate this case and all related aspects of the Affor
dable Care Act. ”
In a potentially decision in 2015, Judge Rosemary M. Collyer ruled that House Republicans had the standi
ng to sue the executive branch over a spending dispute and that the Obama administration had been distribu
ting the health insurance subsidies, in violation of the Constitution, without approval from Congress.
The Justice Department, confident that Judge Collyer’s decision would be reversed, quickly appealed, and t
he subsidies have remained in place during the appeal.
In successfully seeking a temporary halt in the proceedings after Mr. Trump won, House Republicans last mo
nth told the court that they “and the ’s transition team currently are discussing potential options for r
esolution of this matter, to take effect after the ’s inauguration on Jan. 20, 2017. ”
The suspension of the case, House lawyers said, will “provide the and his future administration time to
consider whether to continue prosecuting or to otherwise resolve this appeal. ”
Republican leadership officials in the House acknowledge the possibility of “cascading effects” if the p
ayments, which have totaled an estimated $13 billion, are suddenly stopped.
Insurers that receive the subsidies in exchange for paying costs such as deductibles and for eligible
consumers could race to drop coverage since they would be losing money.
Over all, the loss of the subsidies could destabilize the entire program and cause a lack of confidence th
at leads other insurers to seek a quick exit as well.
Anticipating that the Trump administration might not be inclined to mount a vigorous fight against the Hou
se Republicans given the ’s dim view of the health care law, a team of lawyers this month sought to inter
vene in the case on behalf of two participants in the health care program.
In their request, the lawyers predicted that a deal between House Republicans and the new administration t
o dismiss or settle the case “will produce devastating consequences for the individuals who receive these
reductions, as well as for the nation’s health insurance and health care systems generally. ”
No matter what happens, House Republicans say, they want to prevail on two overarching concepts: the congr
essional power of the purse, and the right of Congress to sue the executive branch if it violates the Cons
titution regarding that spending power.
House Republicans contend that Congress never appropriated the money for the subsidies, as required by the
Constitution.
In the suit, which was initially championed by John A. Boehner, the House speaker at the time, and later i
n House committee reports, Republicans asserted that the administration, desperate for the funding, had re
quired the Treasury Department to provide it despite widespread internal skepticism that the spending was
proper.
The White House said that the spending was a permanent part of the law passed in 2010, and that no annual
appropriation was required — even though the administration initially sought one.
Just as important to House Republicans, Judge Collyer found that Congress had the standing to sue the Whit
e House on this issue — a ruling that many legal experts said was flawed — and they want that preced
ent to be set to restore congressional leverage over the executive branch.
But on spending power and standing, the Trump administration may come under pressure from advocates of pre
sidential authority to fight the House no matter their shared views on health care, since those precedents
could have broad repercussions.
It is a complicated set of dynamics illustrating how a quick legal victory for the House in the Trump era
might come with costs that Republicans never anticipated when they took on the Obama White House.
documents = []
documents_dir = Path('C:/Users/hp/Downloads/set3')
['It is a complicated set of dynamics illustrating how a quick legal victory for the House in the Trump er
a might come with costs that Republicans never anticipated when they took on the Obama White House.', 'In
successfully seeking a temporary halt in the proceedings after Mr. Trump won,House Republicans last month
told the court that they “and the ’s transition team currently are discussing potential options for resol
ution of this matter,to take effect after the ’s inauguration on Jan. 20, 2017. ”']
['It is a complicated set of dynamics illustrating how a quick legal victory for the House in the Trump er
a might come with costs that Republicans never anticipated when they took on the Obama White House.']
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1.]
import numpy as np
from scipy.sparse.csgraph import connected_components
from scipy.special import softmax
import logging
logger = logging.getLogger(__name__)
def degree_centrality_scores(
similarity_matrix,
threshold=None,
increase_power=True,
):
if not (
threshold is None
or isinstance(threshold, float)
and 0 <= threshold < 1
):
raise ValueError(
'\'threshold\' should be a floating-point number '
'from the interval [0, 1) or None',
)
if threshold is None:
markov_matrix = create_markov_matrix(similarity_matrix)
else:
markov_matrix = create_markov_matrix_discrete(
similarity_matrix,
threshold,
)
scores = stationary_distribution(
markov_matrix,
increase_power=increase_power,
normalized=False,
)
return scores
if len(eigenvector) == 1:
return eigenvector
transition = transition_matrix.transpose()
for _ in range(max_iter):
eigenvector_next = np.dot(transition, eigenvector)
if np.allclose(eigenvector_next, eigenvector):
return eigenvector_next
eigenvector = eigenvector_next
if increase_power:
transition = np.dot(transition, transition)
def connected_nodes(matrix):
_, labels = connected_components(matrix)
groups = []
return groups
def create_markov_matrix(weights_matrix):
n_1, n_2 = weights_matrix.shape
if n_1 != n_2:
raise ValueError('\'weights_matrix\' should be square')
return create_markov_matrix(discrete_weights_matrix)
def stationary_distribution(
transition_matrix,
increase_power=True,
normalized=True,
):
n_1, n_2 = transition_matrix.shape
if n_1 != n_2:
raise ValueError('\'transition_matrix\' should be square')
distribution = np.zeros(n_1)
grouped_indices = connected_nodes(transition_matrix)
if normalized:
distribution /= n_1
return distribution
ROUGE
here we giving the sentences from splitted sentence as " model_out "
with change of some words similar meaning of the splitted sentence as " reference "
In [58]: model_out = ["WASHINGTON — Congressional Republicans have a new fear when it comes to their health ca
"The incoming Trump administration could choose to no longer defend the executive branch ag
"But a sudden loss of the disputed subsidies could conceivably cause the health care progra
"That could lead to chaos in the insurance market and spur a political backlash just as Rep
"To stave off that outcome, Republicans could find themselves in the awkward position of ap
"In another twist, Donald J. Trump’s administration, worried about preserving executive bra
"Eager to avoid an ugly political pileup, Republicans on Capitol Hill and the Trump transit
"They are not yet ready to divulge their strategy.",
"“Given that this pending litigation involves the Obama administration and Congress, it wou
"“Upon taking office, the Trump administration will evaluate this case and all related aspe
"In a potentially decision in 2015, Judge Rosemary M. Collyer ruled that House Republican
"The Justice Department, confident that Judge Collyer’s decision would be reversed, quickly
"In successfully seeking a temporary halt in the proceedings after Mr. Trump won, House Rep
"The suspension of the case, House lawyers said, will “provide the and his future adminis
"Republican leadership officials in the House acknowledge the possibility of “cascading eff
"Insurers that receive the subsidies in exchange for paying costs such as deductibles an
"Over all, the loss of the subsidies could destabilize the entire program and cause a lack
"Anticipating that the Trump administration might not be inclined to mount a vigorous fight
"In their request, the lawyers predicted that a deal between House Republicans and the new
"No matter what happens, House Republicans say, they want to prevail on two overarching con
"House Republicans contend that Congress never appropriated the money for the subsidies, as
"In the suit, which was initially championed by John A. Boehner, the House speaker at the t
"The White House said that the spending was a permanent part of the law passed in 2010, and
"Just as important to House Republicans, Judge Collyer found that Congress had the standing
"But on spending power and standing, the Trump administration may come under pressure from
"It is a complicated set of dynamics illustrating how a quick legal victory for the House i
reference = ["WASHINGTON — Congressional Republicans have a new anxiety when it comes to their health care
"The incoming Trump administration could choose to no longer defend the executive branch ag
"But a sudden loss of the disputed subsidies could conceivably cause the health care progra
"That could lead to chaos in the insurance market and spur a political comeback just as Rep
"To stave off that outcome, Republicans could find themselves in the awkward position of ap
"In another twist, Donald J. Trump’s administration, worried about preserving executive bra
"Eager to avoid an ugly political crash, Republicans on Capitol Hill and the Trump transiti
"They are not yet ready to divulge their strategy.",
"“Given that this pending action involves the Obama administration and Congress, it would b
"“Upon taking office, the Trump administration will evaluate this case and all related aspe
"In a potentially decision in 2015, Judge Rosemary M. Collyer ruled that House Republican
"The Justice Department, confident that Judge Collyer’s decision would be reversed, quickly
"In successfully seeking a temporary block in the proceedings after Mr. Trump won, House Re
"The suspension of the case, House lawyers said, will “provide the and his future adminis
"Republican leadership officials in the House acknowledge the possibility of “cascading eff
"Insurers that receive the subsidies in exchange for paying costs such as deductibles an
"Over all, the loss of the subsidies could destabilize the entire program and cause a lack
"Anticipating that the Trump administration might not be inclined to mount a nervous fight
"In their request, the lawyers predicted that a deal between House Republicans and the new
"No matter what happens, House Republicans say, they want to prevail on two overarching con
"House Republicans contend that Congress never appropriated the money for the subsidies, as
"In the suit, which was initially championed by John A. Boehner, the House speaker at the t
"The White House said that the spending was a permanent part of the law passed in 2010, and
"Just as important to House Republicans, Judge Collyer found that Congress had the standing
"But on spending power and standing, the Trump administration may come under pressure from
"It is a hard set of dynamics illustrating how a quick legal victory for the House in the T
files = os.listdir('C:/Users/hp/Downloads/bbc/'+folder)
path = "C:/Users/hp/Downloads/bbc/"+folder+'/'+file
df_dict['topic'].append(folder)
df_dict['news'].append(str(f))
df = pd.DataFrame(df_dict)
df
In [64]: df_final
3 business High fuel prices hit BA's profits\n\nBritish A... high fuel prices hit ba's profits british airways has blamed high fuel prices fo...
Spam e-mails tempt net shoppers\n\nComputer spam e-mails tempt net computer users across the world continue to
2221 tech
us... shoppers ig...
Be careful how you code\n\nA new European a new european directive could put software
2222 tech be careful how you code
dire... wr...
In [65]: df_final.to_csv('df_final.csv')
Document Clustering
In [66]: from sklearn.feature_extraction.text import CountVectorizer
import nltk
from sklearn.decomposition import TruncatedSVD
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.manifold import TSNE
from sklearn.preprocessing import LabelEncoder
from nltk.corpus import stopwords
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm, tqdm_notebook
s = silhouette_score(tsvd_mat, km.labels_)
s_list.append(s)
plt.plot(range(2,21), s_list)
plt.show()
<AxesSubplot:>
Out[69]:
Here, only the tech-related news article looks like having a wider spread whereas other
news articles nicely clustered. It also suggests that LSA (or Truncated SVD) has done a
nice work on the textual data to extract 200 important dimensions to segregate news
articles on different topics. It is to be understood that TSNE is non-deterministic in
nature and multiple runs will produce multiple representations, even though, the
structure will be more likely to remain similar if not the same.
In [70]: from sklearn.metrics import pairwise_distances
import numpy as np
query = "How is sony performing in computer game?"
query_mat = tsvd.transform(dtm.transform([query]))
dist = pairwise_distances(X=tsvd_mat, Y=query_mat, metric='cosine')
df_final['News'][np.argmin(dist.flatten())]
'martin o\'neill hopes to block stilian petrov\'s call-up by bulgaria during celtic\'s busy festive progra
Out[70]:
mme the celtic manager does not view the friendly against valenciana - a region of spain - on 28 december
as important "i can\'t say i was overly pleased at the fixture being played just after christmas" said o
\'neill "we will have to speak with the bulgarian fa it\'s their prerogative for them to arrange a fixture
and our prerogative how we handle it" bulgaria\'s game comes between celtic\'s boxing day meeting with hea
rts and their 2 january fixture against livingston o\'neill believes the extra game will not help a midfie
lder whose season has already been affected by injury "they have organised a game at a rather awkward tim
e" he added "when we were out playing barcelona i spoke with the bulgarian manager hristo stoichkov and he
mentioned this game to me "we pay his wages we are the ones that if an awkward fixture is going to be play
ed at some stage or another it should be to our benefit rather than anyone else "i have said this before b
ut bulgaria are the one nation who seem to organise a lot of friendly games"'
From output, LSA could extract the most relevant document. LSA can't be used for Big
data
# Print head
papers.head()
A Mean Field Theory of Layer IV 10-a-mean-field-theory-of- Abstract 683\n\nA MEAN FIELD THEORY OF
1 10 1987 NaN
of Visual Cort... layer-iv-of-visual-c... Missing LAYER IV OF VISU...
Bayesian Query Construction for 1000-bayesian-query- Abstract Bayesian Query Construction for
3 1000 1994 NaN
Neural Network... construction-for-neural-ne... Missing Neural\nNetwor...
1626 2003 Human and Ideal Observers for Detecting Image ... Abstract Missing Human and Ideal Observers for Detecting Image\...
6497 1994 Direct Multi-Step Time Series Prediction Using... Abstract Missing ?\n\n?\n\n \t \n \n \t \t ...
5158 1991 Merging Constrained Optimisation with Determin... Abstract Missing Merging Constrained Optimisation with\nDetermi...
89 1995 Active Gesture Recognition using Learned Visua... Abstract Missing Active Gesture Recognition using\nLearned Visu...
380 1997 Incorporating Test Inputs into Learning Abstract Missing Incorporating Test Inputs into Learning\n\nZeb...
# Remove punctuation
papers['paper_text_processed'] = \
papers['paper_text'].map(lambda x: re.sub('[,\.!?]', '', x))
stop_words = stopwords.words('english')
stop_words.extend(['from', 'subject', 're', 'edu', 'use'])
def sent_to_words(sentences):
for sentence in sentences:
# deacc=True removes punctuations
yield(gensim.utils.simple_preprocess(str(sentence), deacc=True))
def remove_stopwords(texts):
return [[word for word in simple_preprocess(str(doc))
if word not in stop_words] for doc in texts]
data = papers.paper_text_processed.values.tolist()
data_words = list(sent_to_words(data))
print(data_words[:1][0][:30])
['human', 'ideal', 'observers', 'detecting', 'image', 'curves', 'alan', 'yuille', 'department', 'statistic
s', 'psychology', 'university', 'california', 'los', 'angeles', 'los', 'angeles', 'ca', 'yuille', 'statucl
aedu', 'fang', 'fang', 'psychology', 'university', 'minnesota', 'minneapolis', 'mn', 'fang', 'tcumnedu',
'paul']
# Create Dictionary
id2word = corpora.Dictionary(data_words)
# Create Corpus
texts = data_words
# View
print(corpus[:1][0][:30])
[(0, 1), (1, 5), (2, 3), (3, 1), (4, 2), (5, 2), (6, 2), (7, 1), (8, 1), (9, 1), (10, 2), (11, 1), (12,
1), (13, 3), (14, 1), (15, 1), (16, 1), (17, 1), (18, 2), (19, 1), (20, 2), (21, 3), (22, 1), (23, 2), (2
4, 2), (25, 1), (26, 1), (27, 4), (28, 1), (29, 1)]
# number of topics
num_topics = 10
LDAvis_data_filepath = os.path.join('C:/Users/hp/Downloads/ldavis_prepared_10'+str(num_topics))
LDAvis_prepared
Out[78]: Selected Topic: 0 Previous Topic Next Topic Clear Topic (2)
Slide to adjust relevance metri
λ=1
Intertopic Distance Map (via multidimensional scaling) Top
0 200 400
PC2
data
model
learning
8 using
algorithm
7 figure
problem
5 2 one
set
time
also
models
two
4 distribution
10
PC1 number
first
used
9
6 network
log
3 given
point
information
state
loss
performance
function
1 xi
neurons
different
Marginal topic distribution results
5% 1. saliency(term w) = frequency(w) * [s
2. relevance(term w | topic t) = λ * p(w
10%
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: