Source Code Python Jemmy
Source Code Python Jemmy
2.
)
result, _ = reviews(
'com.dts.freefireth',
continuation_token=continuation_token # defaults to None(load from the beginning)
)
print(result)
3.
import pandas as pd
df = pd.DataFrame(result)
df.to_csv("D:/TestData11.CSV")
4.
pip install nltk
5.
import nltk
nltk.download()
6.
pip install Sastrawi
7.
pip install numpy
8.
import pandas as pd
import numpy as np
TWEET_DATA = pd.read_csv("D:/data_ff.csv")
TWEET_DATA.head()
9.
TWEET_DATA.to_csv("D:/data_ff.csv")
10.
11.
import string
import re #regex library
def remove_tweet_special(text):
# remove tab, new line, ans back slice
text = text.replace('\\t'," ").replace('\\n'," ").replace('\\u'," ").replace('\\',"")
# remove non ASCII (emoticon, chinese word, .etc)
text = text.encode('ascii', 'replace').decode('ascii')
# remove mention, link, hashtag
text = ' '.join(re.sub("([@#][A-Za-z0-9]+)|(\w+:\/\/\S+)"," ", text).split())
# remove incomplete URL
return text.replace("http://", " ").replace("https://", " ")
TWEET_DATA['content'] = TWEET_DATA['content'].apply(remove_tweet_special)
#remove punctuation
def remove_punctuation(text):
return text.translate(str.maketrans("","",string.punctuation))
TWEET_DATA['content'] = TWEET_DATA['content'].apply(remove_punctuation)
TWEET_DATA['content'] = TWEET_DATA['content'].apply(remove_whitespace_LT)
TWEET_DATA['content'] = TWEET_DATA['content'].apply(remove_whitespace_multiple)
TWEET_DATA['content'] = TWEET_DATA['content'].apply(remove_singl_char)
TWEET_DATA['content_tokens'] = TWEET_DATA['content'].apply(word_tokenize_wrapper)
TWEET_DATA['content_tokens_fdist'] =
TWEET_DATA['content_tokens'].apply(freqDist_wrapper)
12.
# ---------------------------------------------------------------------------------------
TWEET_DATA['content_tokens_WSW'] =
TWEET_DATA['content_tokens'].apply(stopwords_removal)
print(TWEET_DATA['content_tokens_WSW'].head())
13.
normalizad_word = pd.read_excel("D:/normalisasi.xlsx")
normalizad_word_dict = {}
def normalized_term(document):
return [normalizad_word_dict[term] if term in normalizad_word_dict else term for term in
document]
TWEET_DATA['content_normalized'] =
TWEET_DATA['content_tokens_WSW'].apply(normalized_term)
TWEET_DATA['content_normalized'].head(10)
14.
15.
# stemmed
def stemmed_wrapper(term):
return stemmer.stem(term)
term_dict = {}
print(len(term_dict))
print("------------------------")
print(term_dict)
print("------------------------")
TWEET_DATA['content_tokens_stemmed'] =
TWEET_DATA['content_normalized'].swifter.apply(get_stemmed_term)
print(TWEET_DATA['content_tokens_stemmed'])
16.
TWEET_DATA.to_csv("data_ff.csv")
17.
TWEET_DATA.to_excel("data_ff.xlsx")