Twitter Data Pull
Twitter Data Pull
import xlsxwriter
from botocore.client import Config
from email import encoders
from datetime import datetime as dt,timedelta,date, datetime
import http.client
import smtplib, ssl
from email.message import EmailMessage
from email.utils import make_msgid
import tweepy
import requests
import redshift_tool
session = boto3.session.Session()
client = session.client(service_name='secretsmanager',region_name="ap-south-1")
aw_json= json.loads(client.get_secret_value(SecretId="aws-reports")['SecretString'])
app_password=aw_json['app_password']
today_date=dt.now().strftime("%Y-%m-%d")
today = datetime.today()
search_query = "moreretail"
no_of_tweets =150
try:
#The number of tweets we want to retrieved from the search
tweets = api.search_tweets(q=search_query, count=no_of_tweets)
#Creation of Dataframe
tweets_df = pd.DataFrame(attributes_container, columns=columns)
tweets_df['Link to Tweet'] =
'https://twitter.com/twitter/statuses/'+tweets_df["id"].astype(str)
except BaseException as e:
print('Status Failed On,',str(e))
def gen_url(key,bucket='liquidation'):
def
send_email(filepathlist=[],to=None,subject=None,content=None,bucket='liquidation'):
msg = EmailMessage()
msg.set_content('This is a plain text body.')
msg["From"] = '[email protected]'
msg["To"] = ",".join(to)
msg["Subject"] = subject
msg.add_alternative(content,'html')
if len(filepathlist)>0:
for i in filepathlist:
s3 = boto3.client('s3')
object = s3.get_object(Bucket=bucket,Key=i)
objectContent = object['Body']
msg.add_attachment(objectContent.read(),
maintype='application',
subtype='octet-stream',
filename=i)
try:
server = smtplib.SMTP("smtp.gmail.com", 587)
server.ehlo()
server.starttls()
server.login(msg["From"],app_password)
server.sendmail(msg["From"],to,msg.as_string())
server.close()
print ('successfully sent the mail')
except Exception as e:
msg=getattr(e, 'message', repr(e))
print (msg)
worksheet.set_column(first_col=idx,last_col=idx+1,width=max_len,cell_format=formatint)
except:
try:
float(series.values[0])
worksheet.set_column(first_col=idx+1,last_col=idx+1,width=max_len,cell_format=formatco
mma)
except:
worksheet.set_column(first_col=idx+1,last_col=idx+1,width=max_len)
data = output.getvalue()
s3 = boto3.resource('s3')
s3.Bucket(bucket).put_object(Key=filename,Body=data)
def get_emails(report):
ed=f"""
SELECT email_id FROM ssp_prod.subscription WHERE report_code='{report}'
"""
eddf=pd.read_sql(ed,postgres_c)
to=eddf['email_id'].tolist()
# tweets_df_final['date_created'] =
pd.to_datetime(tweets_df_final['date_created']).dt.tz_localize(None)
# tweets_df_final['date_created'] = tweets_df_final['date_created'].dt.date
tweets_df_final['date'] = tweets_df_final['date_created'].dt.date
tweets_df_final = tweets_df_final[tweets_df_final['date'] > today - pd.offsets.Day(8)]
tweets_df_final = tweets_df_final.drop(columns = 'date', axis = 1)
cust_data=pd.read_sql_query(sql,redshift_c)
model = "cardiffnlp/twitter-roberta-base-sentiment-latest"
hf_token = "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"
def analysis(data):
payload = dict(inputs=data, options=dict(wait_for_model=True))
cust_feed_analysis = []
for feedback in cust_data['feedback']:
try:
sentiment_result = analysis(feedback)[0]
top_sentiment = max(sentiment_result, key=lambda x: x['score']) # Get the
sentiment with the higher score
cust_feed_analysis.append({'tweet': feedback, 'sentiment':
top_sentiment['label']})
except Exception as e:
print(e)
#############################total summary###########################################
sent_ana_append = df_tweet.append(df_cust)
df_cust_mob = str(df_cust_feedback.cust_mobile_no.str[2:].to_list())
df_cust_mob=df_cust_mob.replace('[','(')
df_cust_mob=df_cust_mob.replace("]",")")
tran_cust=pd.read_sql_query(sql_bills,redshift_c)
past_cust_complaints=pd.read_sql_query(sql_comp,redshift_c)
past_tweets=pd.read_sql_query(sql,redshift_c)
df_cust_feedback['mobile_no'] = df_cust_feedback.cust_mobile_no.str[2:]
df_cust_final = pd.merge(df_cust_feedback,tran_cust, on = ['mobile_no'], how =
'left' )
df_cust_final_2 = pd.merge(df_cust_final,past_cust_complaints, on = ['mobile_no'], how
= 'left' )
df_cust_final_2 = df_cust_final_2.drop(columns = 'mobile_no', axis = 1)
df_tweet_final = pd.merge(df_tweet_feedback,past_tweets, on = ['user_name'], how =
'left' )
key=('session_id')
dbname = 'more'
host='more-dw.cpeum8vakng6.ap-south-1.redshift.amazonaws.com'
port='1433'
user='amit.khajuria'
password = 'M0Re#2@22$'
redshift_tool.query(data=df_cust_final_2,method='upsert',redshift_auth={'db':dbname,'p
ort':port,'user':user,'pswd':password,'host':host},
s3_auth={'accesskey':'AKIASREM227VLIGDO7XL','secretkey':'s+9C57OL2JrOgnPJjOl6D/
quurPzFRpQWfDh23Ps','bucket':'liquidation'},
schema='product',table='customer_feedback',sortkey=key,
primarykey=key,upsertkey=key)
print("Data Loaded to Redshift")
dbname = 'NA'
host='NA'
port='NA'
user='NA'
password = 'NA'
redshift_tool.query(data=df_tweet_final,method='upsert',redshift_auth={'db':dbname,'po
rt':port,'user':user,'pswd':password,'host':host},
s3_auth={'accesskey':'AKIASREM227VLIGDO7XL','secretkey':'s+9C57OL2JrOgnPJjOl6D/
quurPzFRpQWfDh23Ps','bucket':'liquidation'},
schema='product',table='social_media_scrape',sortkey=key,
primarykey=key,upsertkey=key)
print("Data Loaded to Redshift")
#Mail
subject="Customer Feedback Report - Web/Social"
content=f"""
<br>Summary of Customer Feedback in last 7 days : <br>
{sent_ana_trans.to_html()}
<br>Customer Feedback through Social Media : <br>
{df_tweet_final.to_html()}
<br>Customer Feedback through Web: <br>
{df_cust_final_2.to_html()}
"""
#to = ['[email protected]']
to = get_emails('sm_scrape')
send_email(to=to,filepathlist=[],subject=subject,content=content)
```