0% found this document useful (0 votes)
61 views10 pages

Twitter Data Pull

The document imports several Python libraries and defines functions for interacting with APIs, databases, sending emails, and analyzing sentiment. It extracts tweet and customer feedback data, performs sentiment analysis on the texts, and combines the results into summary dataframes and tables.

Uploaded by

Nischal Padarthi
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
61 views10 pages

Twitter Data Pull

The document imports several Python libraries and defines functions for interacting with APIs, databases, sending emails, and analyzing sentiment. It extracts tweet and customer feedback data, performs sentiment analysis on the texts, and combines the results into summary dataframes and tables.

Uploaded by

Nischal Padarthi
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 10

import os, boto3, csv,sys,io

import pandas as pd, numpy as np


from sqlalchemy import types, create_engine
from sqlalchemy.sql import text
import psycopg2, json, random, string
import warnings
import boto3.session
warnings.filterwarnings('ignore')

import xlsxwriter
from botocore.client import Config
from email import encoders
from datetime import datetime as dt,timedelta,date, datetime
import http.client
import smtplib, ssl
from email.message import EmailMessage
from email.utils import make_msgid
import tweepy
import requests
import redshift_tool
session = boto3.session.Session()
client = session.client(service_name='secretsmanager',region_name="ap-south-1")
aw_json= json.loads(client.get_secret_value(SecretId="aws-reports")['SecretString'])
app_password=aw_json['app_password']
today_date=dt.now().strftime("%Y-%m-%d")
today = datetime.today()

redshift_c = psycopg2.connect(dbname='more', host="more-dw.cpeum8vakng6.ap-south-


1.redshift.amazonaws.com", port=1433,user='amit.khajuria', password='M0Re#2@22$')
redshift_c.set_session(autocommit=True)
today_date=dt.now().strftime("%Y-%m-%d")
session = boto3.session.Session()
client = session.client(service_name='secretsmanager',region_name="ap-south-1")
postgres_c = create_engine("postgresql://ssp_prod_usr:N4BQq=#!63@mrpl-aurora-
v2.cluster-cukiszzh1lyw.ap-south-1.rds.amazonaws.com:5432/mrpl",echo=False)

consumer_key = "ZFA0nk9UHAPsCKN6d8i4GN4hK" #Your API/Consumer key


consumer_secret = "j4fRQIpaTptuBSIKbILAMAatd1C37KthZRlZjSOB8oGPqYo76c" #Your
API/Consumer Secret Key

Ranjith D, PM Intern, May 2023


access_token = "344826897-L7pPauvjOPSoUbJrxL7cMzxV4g0ZTpsbOVRNNcBI" #Your Access
token key
access_token_secret = "qiKBLLs6I19ivXYq5tI654WmrhEQkUtZkkYov53CuqqLr" #Your Access
token Secret key

#Pass in our twitter API authentication key


auth = tweepy.OAuth1UserHandler(
consumer_key, consumer_secret,
access_token, access_token_secret
)

#Instantiate the tweepy API


api = tweepy.API(auth, wait_on_rate_limit=True)

search_query = "moreretail"
no_of_tweets =150

try:
#The number of tweets we want to retrieved from the search
tweets = api.search_tweets(q=search_query, count=no_of_tweets)

#Pulling Some attributes from the tweet


attributes_container = [[tweet.id, tweet.user.name, tweet.created_at,
tweet.favorite_count, tweet.source, tweet.text] for tweet in tweets]

#Creation of column list to rename the columns in the dataframe


# columns = ["ID","User", "Date Created", "Number of Likes", "Source of Tweet",
"Tweet"]
columns = ["id","user_name", "date_created", "number_of_likes", "source_of_tweet",
"tweet"]

#Creation of Dataframe
tweets_df = pd.DataFrame(attributes_container, columns=columns)
tweets_df['Link to Tweet'] =
'https://twitter.com/twitter/statuses/'+tweets_df["id"].astype(str)
except BaseException as e:
print('Status Failed On,',str(e))

def gen_url(key,bucket='liquidation'):

Ranjith D, PM Intern, May 2023


import boto3
from botocore.client import Config
s3 =
boto3.client('s3',aws_access_key_id='AKIASREM227VLIGDO7XL',aws_secret_access_key='s+9C
57OL2JrOgnPJjOl6D/quurPzFRpQWfDh23Ps',region_name='ap-south-1',
config=Config(signature_version='s3v4'))
url = s3.generate_presigned_url(ClientMethod='get_object',Params={'Bucket':
bucket,'Key': key},ExpiresIn=604800,HttpMethod='GET')
return url

def
send_email(filepathlist=[],to=None,subject=None,content=None,bucket='liquidation'):
msg = EmailMessage()
msg.set_content('This is a plain text body.')
msg["From"] = '[email protected]'
msg["To"] = ",".join(to)
msg["Subject"] = subject
msg.add_alternative(content,'html')
if len(filepathlist)>0:
for i in filepathlist:
s3 = boto3.client('s3')
object = s3.get_object(Bucket=bucket,Key=i)
objectContent = object['Body']
msg.add_attachment(objectContent.read(),
maintype='application',
subtype='octet-stream',
filename=i)
try:
server = smtplib.SMTP("smtp.gmail.com", 587)
server.ehlo()
server.starttls()
server.login(msg["From"],app_password)
server.sendmail(msg["From"],to,msg.as_string())
server.close()
print ('successfully sent the mail')
except Exception as e:
msg=getattr(e, 'message', repr(e))
print (msg)

Ranjith D, PM Intern, May 2023


def writetos3_excel(filename,bucket='liquidation',frames={'DC-Level
Summary(SM)':'skuavail'}):
with io.BytesIO() as output:
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
workbook=writer.book
formatcomma = workbook.add_format({'num_format': '#,##0.0'})
formatint = workbook.add_format({'num_format': '0',"font_name": "Century
Gothic"})
frames = frames
for sheet, frame in frames.items():
frame=frame.round(2)
frame.to_excel(writer, sheet_name = sheet)
worksheet = writer.sheets[sheet]
for idx, col in enumerate(frame): # loop through all columns
series = frame[col]
max_len = min(max(series.astype(str).map(len).max(),12),40)
try:
if np.array_equal(series, series.astype(int)):

worksheet.set_column(first_col=idx,last_col=idx+1,width=max_len,cell_format=formatint)
except:
try:
float(series.values[0])

worksheet.set_column(first_col=idx+1,last_col=idx+1,width=max_len,cell_format=formatco
mma)
except:

worksheet.set_column(first_col=idx+1,last_col=idx+1,width=max_len)
data = output.getvalue()
s3 = boto3.resource('s3')
s3.Bucket(bucket).put_object(Key=filename,Body=data)

def get_emails(report):
ed=f"""
SELECT email_id FROM ssp_prod.subscription WHERE report_code='{report}'
"""
eddf=pd.read_sql(ed,postgres_c)

to=eddf['email_id'].tolist()

Ranjith D, PM Intern, May 2023


return to

# tweets_df_final['date_created'] =
pd.to_datetime(tweets_df_final['date_created']).dt.tz_localize(None)
# tweets_df_final['date_created'] = tweets_df_final['date_created'].dt.date

tweets_df_final = tweets_df[(tweets_df['source_of_tweet'] != 'Konnect


Insights')&(tweets_df['user_name'] != 'More Retail')].reset_index(inplace = False,
drop = True)
tweets_df_final = tweets_df[(tweets_df['source_of_tweet'] != 'Konnect
Insights')&(tweets_df['user_name'] != 'More Retail')].reset_index(inplace = False,
drop = True)

tweets_df_final['date'] = tweets_df_final['date_created'].dt.date
tweets_df_final = tweets_df_final[tweets_df_final['date'] > today - pd.offsets.Day(8)]
tweets_df_final = tweets_df_final.drop(columns = 'date', axis = 1)

sql = """SELECT i.session_id,


i.cust_wanum as cust_mobile_no,
i.cust_waname as cust_name,
'moreretail-contact us' as source,
i.c_date::timestamp as date_created,
i.bill_no,
feedback
FROM interface.support_final_view i
WHERE feedback is not null
and lower(feedback) not like '%test%'
and c_date > current_date-7"""

cust_data=pd.read_sql_query(sql,redshift_c)

model = "cardiffnlp/twitter-roberta-base-sentiment-latest"
hf_token = "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"

API_URL = "https://api-inference.huggingface.co/models/" + model


headers = {"Authorization": "Bearer %s" % (hf_token)}

def analysis(data):
payload = dict(inputs=data, options=dict(wait_for_model=True))

Ranjith D, PM Intern, May 2023


response = requests.post(API_URL, headers=headers, json=payload)
return response.json()

###################Twitter data sentimental


analysis#####################################
tweets_analysis = []
for tweet in tweets_df_final['tweet']:
try:
sentiment_result = analysis(tweet)[0]
top_sentiment = max(sentiment_result, key=lambda x: x['score']) # Get the
sentiment with the higher score
tweets_analysis.append({'tweet': tweet, 'sentiment': top_sentiment['label']})
except Exception as e:
print(e)

df_tweet = pd.DataFrame(tweets_analysis).reset_index(inplace = False, drop = True)

####################Customer feeback sentimental analysis#####################

cust_feed_analysis = []
for feedback in cust_data['feedback']:
try:
sentiment_result = analysis(feedback)[0]
top_sentiment = max(sentiment_result, key=lambda x: x['score']) # Get the
sentiment with the higher score
cust_feed_analysis.append({'tweet': feedback, 'sentiment':
top_sentiment['label']})
except Exception as e:
print(e)

df_cust = pd.DataFrame(cust_feed_analysis).reset_index(inplace = False, drop = True)

#############################total summary###########################################

sent_ana_append = df_tweet.append(df_cust)

Ranjith D, PM Intern, May 2023


sent_ana_comb =
pd.DataFrame(sent_ana_append['sentiment'].value_counts()).reset_index()
sent_ana_trans = sent_ana_comb.set_index('index').T.reset_index(inplace = False,
drop=True)
sent_ana_trans['#_feedback'] = sent_ana_append['sentiment'].count()
cols = list(sent_ana_trans.columns)
sent_ana_trans= sent_ana_trans[cols[::-1]]

df_tweet_feedback = pd.concat([tweets_df_final, df_tweet['sentiment']], axis =


1).reset_index(inplace = False, drop = True).sort_values(by=['sentiment'], ascending=
True)

df_cust_feedback = pd.concat([cust_data, df_cust['sentiment']], axis =


1).reset_index(inplace = False, drop = True).sort_values(by=['sentiment'], ascending=
True)
# remove special character
df_tweet_feedback.columns = df_tweet_feedback.columns.str.replace(' ', '_')
df_tweet_feedback.columns= df_tweet_feedback.columns.str.lower()

# remove special character


df_cust_feedback.columns = df_cust_feedback.columns.str.replace(' ', '_')
df_cust_feedback.columns= df_cust_feedback.columns.str.lower()

df_cust_mob = str(df_cust_feedback.cust_mobile_no.str[2:].to_list())
df_cust_mob=df_cust_mob.replace('[','(')
df_cust_mob=df_cust_mob.replace("]",")")

##merging # of bills and past complaints

sql_bills = f"""SELECT mobile_no,


COUNT(DISTINCT(bill_no))::text as #_bills
FROM tran.txn
WHERE b_date > CURRENT_DATE-180
AND mobile_no in {df_cust_mob}
group by 1
"""

tran_cust=pd.read_sql_query(sql_bills,redshift_c)

sql_comp = f"""SELECT right(cust_mobile_no, 10) as mobile_no,

Ranjith D, PM Intern, May 2023


COUNT(DISTINCT(session_id))::text as #_complaints
from product.customer_feedback
where date_created > current_date-180
and right(cust_mobile_no, 10) in {df_cust_mob}
group by 1
"""

past_cust_complaints=pd.read_sql_query(sql_comp,redshift_c)

sql = f"""select user_name, COUNT(*)::text as #_total_tweets


from product.social_media_scrape sms
group by 1
"""

past_tweets=pd.read_sql_query(sql,redshift_c)

df_cust_feedback['mobile_no'] = df_cust_feedback.cust_mobile_no.str[2:]
df_cust_final = pd.merge(df_cust_feedback,tran_cust, on = ['mobile_no'], how =
'left' )
df_cust_final_2 = pd.merge(df_cust_final,past_cust_complaints, on = ['mobile_no'], how
= 'left' )
df_cust_final_2 = df_cust_final_2.drop(columns = 'mobile_no', axis = 1)
df_tweet_final = pd.merge(df_tweet_feedback,past_tweets, on = ['user_name'], how =
'left' )

key=('session_id')

dbname = 'more'
host='more-dw.cpeum8vakng6.ap-south-1.redshift.amazonaws.com'
port='1433'
user='amit.khajuria'
password = 'M0Re#2@22$'

redshift_tool.query(data=df_cust_final_2,method='upsert',redshift_auth={'db':dbname,'p
ort':port,'user':user,'pswd':password,'host':host},

s3_auth={'accesskey':'AKIASREM227VLIGDO7XL','secretkey':'s+9C57OL2JrOgnPJjOl6D/
quurPzFRpQWfDh23Ps','bucket':'liquidation'},
schema='product',table='customer_feedback',sortkey=key,
primarykey=key,upsertkey=key)
print("Data Loaded to Redshift")

Ranjith D, PM Intern, May 2023


import redshift_tool
key=( 'user_name'
, 'date_created')

dbname = 'NA'
host='NA'
port='NA'
user='NA'
password = 'NA'

redshift_tool.query(data=df_tweet_final,method='upsert',redshift_auth={'db':dbname,'po
rt':port,'user':user,'pswd':password,'host':host},

s3_auth={'accesskey':'AKIASREM227VLIGDO7XL','secretkey':'s+9C57OL2JrOgnPJjOl6D/
quurPzFRpQWfDh23Ps','bucket':'liquidation'},
schema='product',table='social_media_scrape',sortkey=key,
primarykey=key,upsertkey=key)
print("Data Loaded to Redshift")

#Mail
subject="Customer Feedback Report - Web/Social"
content=f"""
<br>Summary of Customer Feedback in last 7 days : <br>
{sent_ana_trans.to_html()}
<br>Customer Feedback through Social Media : <br>
{df_tweet_final.to_html()}
<br>Customer Feedback through Web: <br>
{df_cust_final_2.to_html()}

"""

#to = ['[email protected]']
to = get_emails('sm_scrape')

send_email(to=to,filepathlist=[],subject=subject,content=content)

```

Ranjith D, PM Intern, May 2023


Ranjith D, PM Intern, May 2023

You might also like