0% found this document useful (0 votes)

144 views7 pages

Source Code Python Jemmy

1. The document describes steps to preprocess Twitter sentiment data on Free Fire game reviews in Indonesian, including downloading necessary libraries, importing data, cleaning text by removing stopwords and punctuation, stemming words, and saving the final preprocessed data. 2. Key preprocessing steps include tokenizing text, removing stopwords, normalizing words, and stemming words. The document provides code snippets in Python to implement each step using libraries like NLTK, Pandas, Sastrawi, and NumPy. 3. The final preprocessed data is saved in CSV and Excel formats for further analysis.

Uploaded by

Fadilah Riczky

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

144 views7 pages

Source Code Python Jemmy

Uploaded by

Fadilah Riczky

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 7

1.

pip install google-play-scraper

from google_play_scraper import Sort, reviews

result, continuation_token = reviews(
'com.dts.freefireth',
lang='id', # defaults to 'en'
country='id', # defaults to 'us'
sort=Sort.MOST_RELEVANT, # defaults to Sort.MOST_RELEVANT
count=4000, # defaults to 100

)
result, _ = reviews(
'com.dts.freefireth',
continuation_token=continuation_token # defaults to None(load from the beginning)
)
print(result)

import pandas as pd

df = pd.DataFrame(result)
df.to_csv("D:/TestData11.CSV")

4.
pip install nltk

5.
import nltk
nltk.download()

6.
pip install Sastrawi
7.
pip install numpy

8.
import pandas as pd
import numpy as np

TWEET_DATA = pd.read_csv("D:/data_ff.csv")

TWEET_DATA.head()

9.
TWEET_DATA.to_csv("D:/data_ff.csv")

10.

# ------ Case Folding --------

# gunakan fungsi Series.str.lower() pada Pandas
TWEET_DATA['content'] = TWEET_DATA['content'].str.lower()

print('Case Folding Result : \n')

print(TWEET_DATA['content'].head(5))
print('\n\n\n')

11.

import string
import re #regex library

# import word_tokenize & FreqDist from NLTK

from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist

# ------ Tokenizing ---------

def remove_tweet_special(text):
# remove tab, new line, ans back slice
text = text.replace('\\t'," ").replace('\\n'," ").replace('\\u'," ").replace('\\',"")
# remove non ASCII (emoticon, chinese word, .etc)
text = text.encode('ascii', 'replace').decode('ascii')
# remove mention, link, hashtag
text = ' '.join(re.sub("([@#][A-Za-z0-9]+)|(\w+:\/\/\S+)"," ", text).split())
# remove incomplete URL
return text.replace("http://", " ").replace("https://", " ")

TWEET_DATA['content'] = TWEET_DATA['content'].apply(remove_tweet_special)

#remove punctuation
def remove_punctuation(text):
return text.translate(str.maketrans("","",string.punctuation))

TWEET_DATA['content'] = TWEET_DATA['content'].apply(remove_punctuation)

#remove whitespace leading & trailing

def remove_whitespace_LT(text):
return text.strip()

TWEET_DATA['content'] = TWEET_DATA['content'].apply(remove_whitespace_LT)

#remove multiple whitespace into single whitespace

def remove_whitespace_multiple(text):
return re.sub('\s+',' ',text)

TWEET_DATA['content'] = TWEET_DATA['content'].apply(remove_whitespace_multiple)

# remove single char

def remove_singl_char(text):
return re.sub(r"\b[a-zA-Z]\b", "", text)

TWEET_DATA['content'] = TWEET_DATA['content'].apply(remove_singl_char)

# NLTK word rokenize

def word_tokenize_wrapper(text):
return word_tokenize(text)

TWEET_DATA['content_tokens'] = TWEET_DATA['content'].apply(word_tokenize_wrapper)

print('Tokenizing Result : \n')

print(TWEET_DATA['content_tokens'].head())
print('\n\n\n')
11.

# NLTK calc frequency distribution

def freqDist_wrapper(text):
return FreqDist(text)

TWEET_DATA['content_tokens_fdist'] =
TWEET_DATA['content_tokens'].apply(freqDist_wrapper)

print('Frequency Tokens : \n')

print(TWEET_DATA['content_tokens_fdist'].head().apply(lambda x : x.most_common()))

12.

from nltk.corpus import stopwords

# ----------------------- get stopword from NLTK stopword -------------------------------

# get stopword indonesia
list_stopwords = stopwords.words('indonesian')

# ---------------------------- manualy add stopword ------------------------------------

# append additional stopword
list_stopwords.extend(["yg", "dg", "rt", "dgn", "ny", "d", 'klo',
'kalo', 'amp', 'biar', 'bikin', 'bilang',
'gak', 'ga', 'krn', 'nya', 'nih', 'sih',
'si', 'tau', 'tdk', 'tuh', 'utk', 'ya',
'jd', 'jgn', 'sdh', 'aja', 'n', 't',
'nyg', 'hehe', 'pen', 'u', 'nan', 'loh', 'rt',
'&amp', 'yah'])

# ----------------------- add stopword from txt file ------------------------------------

# read txt stopword using pandas
txt_stopword = pd.read_csv("D:/stopwords.txt", names= ["stopwords"], header = None)

# convert stopword string to list & append additional stopword

list_stopwords.extend(txt_stopword["stopwords"][0].split(' '))

# ---------------------------------------------------------------------------------------

# convert list to dictionary

list_stopwords = set(list_stopwords)
#remove stopword pada list token
def stopwords_removal(words):
return [word for word in words if word not in list_stopwords]

TWEET_DATA['content_tokens_WSW'] =
TWEET_DATA['content_tokens'].apply(stopwords_removal)

print(TWEET_DATA['content_tokens_WSW'].head())

13.

normalizad_word = pd.read_excel("D:/normalisasi.xlsx")

normalizad_word_dict = {}

for index, row in normalizad_word.iterrows():

if row[0] not in normalizad_word_dict:
normalizad_word_dict[row[0]] = row[1]

def normalized_term(document):
return [normalizad_word_dict[term] if term in normalizad_word_dict else term for term in
document]

TWEET_DATA['content_normalized'] =
TWEET_DATA['content_tokens_WSW'].apply(normalized_term)

TWEET_DATA['content_normalized'].head(10)

14.

conda install -c conda-forge swifter

15.

# import Sastrawi package

from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
import swifter
# create stemmer
factory = StemmerFactory()
stemmer = factory.create_stemmer()

# stemmed
def stemmed_wrapper(term):
return stemmer.stem(term)

term_dict = {}

for document in TWEET_DATA['content_normalized']:

for term in document:
if term not in term_dict:
term_dict[term] = ' '

print(len(term_dict))
print("------------------------")

for term in term_dict:

term_dict[term] = stemmed_wrapper(term)
print(term,":" ,term_dict[term])

print(term_dict)
print("------------------------")

# apply stemmed term to dataframe

def get_stemmed_term(document):
return [term_dict[term] for term in document]

TWEET_DATA['content_tokens_stemmed'] =
TWEET_DATA['content_normalized'].swifter.apply(get_stemmed_term)
print(TWEET_DATA['content_tokens_stemmed'])

16.

TWEET_DATA.to_csv("data_ff.csv")
17.
TWEET_DATA.to_excel("data_ff.xlsx")

NLP Practicals All
No ratings yet
NLP Practicals All
57 pages
Hacking Hacking Practical Guide For Beginners (Hacking With Python) (PDFDrive)
67% (3)
Hacking Hacking Practical Guide For Beginners (Hacking With Python) (PDFDrive)
94 pages
20BCP112 - NLP Lab - LAB - Manual
No ratings yet
20BCP112 - NLP Lab - LAB - Manual
65 pages
SQL Class 12 Notes CBSE Computer Science - Techt6
No ratings yet
SQL Class 12 Notes CBSE Computer Science - Techt6
29 pages
All in One Computer Programming
100% (10)
All in One Computer Programming
203 pages
Sahil NLP
No ratings yet
Sahil NLP
16 pages
NLP Expts
No ratings yet
NLP Expts
41 pages
NLP Lab File
No ratings yet
NLP Lab File
13 pages
NLP Lab File
No ratings yet
NLP Lab File
15 pages
AP19110010110 Lab Assignment-2 - Jupyter Notebook
No ratings yet
AP19110010110 Lab Assignment-2 - Jupyter Notebook
18 pages
NLP - (1) (1) .Ipynb - Colab
No ratings yet
NLP - (1) (1) .Ipynb - Colab
10 pages
AminaRahmanK DL Lab5
No ratings yet
AminaRahmanK DL Lab5
11 pages
115 Ir 7
No ratings yet
115 Ir 7
6 pages
Ir Lab 2 Ir Learning Outcomes: Pyterrier
No ratings yet
Ir Lab 2 Ir Learning Outcomes: Pyterrier
7 pages
Programs Code
No ratings yet
Programs Code
7 pages
Record
No ratings yet
Record
6 pages
NLP Lab1
No ratings yet
NLP Lab1
6 pages
NLP Pratical
No ratings yet
NLP Pratical
14 pages
7 TextAnalysis
No ratings yet
7 TextAnalysis
3 pages
NLP
No ratings yet
NLP
12 pages
7 Idf
No ratings yet
7 Idf
5 pages
Koding Text Mining
No ratings yet
Koding Text Mining
10 pages
Text Analytics
No ratings yet
Text Analytics
3 pages
Se 3 Tal 5 Ees
No ratings yet
Se 3 Tal 5 Ees
1 page
NLP - Lab - 1.ipynb - Colab
No ratings yet
NLP - Lab - 1.ipynb - Colab
4 pages
NLP Lab Programs
No ratings yet
NLP Lab Programs
3 pages
NLP Lab Programms
No ratings yet
NLP Lab Programms
9 pages
Exp1 NLP
No ratings yet
Exp1 NLP
2 pages
C Programming MCQ Operator
No ratings yet
C Programming MCQ Operator
81 pages
OOP Important Questions
No ratings yet
OOP Important Questions
4 pages
CCS Module 2
No ratings yet
CCS Module 2
29 pages
Types of Virtualization
No ratings yet
Types of Virtualization
3 pages
Library Management System For Stanford.
No ratings yet
Library Management System For Stanford.
6 pages
Assassin SILENT AIM - FEB 2023
No ratings yet
Assassin SILENT AIM - FEB 2023
2 pages
Mplabx Ccs Tutorial
No ratings yet
Mplabx Ccs Tutorial
19 pages
CMP 512-Java-R
No ratings yet
CMP 512-Java-R
188 pages
Aqwa Intro 2019R2 en LE01
No ratings yet
Aqwa Intro 2019R2 en LE01
20 pages
Rajesh
No ratings yet
Rajesh
4 pages
Method Overriding Final and Abstract
No ratings yet
Method Overriding Final and Abstract
14 pages
FDSA Assignment-4
No ratings yet
FDSA Assignment-4
3 pages
CSC 595 Special Project Template
No ratings yet
CSC 595 Special Project Template
28 pages
CPP Week 1 Handout
No ratings yet
CPP Week 1 Handout
38 pages
Chapter 1 - Introduction
No ratings yet
Chapter 1 - Introduction
33 pages
Intellipaat Python Certification Training Course Converted 3
No ratings yet
Intellipaat Python Certification Training Course Converted 3
11 pages
Overview of The Data Analyst Ecosystem-En
No ratings yet
Overview of The Data Analyst Ecosystem-En
2 pages
Siebel EScript
No ratings yet
Siebel EScript
8 pages
W3School Javascript Quiz Test
No ratings yet
W3School Javascript Quiz Test
6 pages
Submission
No ratings yet
Submission
13 pages
Flexman PDF
No ratings yet
Flexman PDF
37 pages
TCR Assignment 2
No ratings yet
TCR Assignment 2
8 pages
Manish Patidar: Itinformati P T. LTD., Indore - Soft Are de Eloper
No ratings yet
Manish Patidar: Itinformati P T. LTD., Indore - Soft Are de Eloper
2 pages
Mysql: The History of Mysql
No ratings yet
Mysql: The History of Mysql
9 pages
Contact: Muhammad Aysar Bin Mahmad Yusup
No ratings yet
Contact: Muhammad Aysar Bin Mahmad Yusup
1 page
cs140 LabSheet 6 PDF
No ratings yet
cs140 LabSheet 6 PDF
6 pages
Assignment No.1: (BIT10A)
No ratings yet
Assignment No.1: (BIT10A)
1 page
Essential n8n Playbook
From Everand
Essential n8n Playbook
Leandro Calado
No ratings yet
React Portfolio App Development: Increase your online presence and create your personal brand
From Everand
React Portfolio App Development: Increase your online presence and create your personal brand
Abdelfattah Ragab
No ratings yet
Simplifying Data Science With Python
From Everand
Simplifying Data Science With Python
Billy David millican
No ratings yet
Ms Access 2007: Step by Step
From Everand
Ms Access 2007: Step by Step
Asim Abbasi
5/5 (1)
Postgresql Jsonb: Learn This Powerful Tool By Example
From Everand
Postgresql Jsonb: Learn This Powerful Tool By Example
Mohammed N. S. Al Saadi
No ratings yet
Windows Command Prompt A-N
From Everand
Windows Command Prompt A-N
Prometheus MMS
5/5 (2)
Python for Data Science: Data Science Mastery by Nikhil Khan, #1
From Everand
Python for Data Science: Data Science Mastery by Nikhil Khan, #1
Nikhil Khan
No ratings yet
NgRx SignalStore: An effortless solution for state management
From Everand
NgRx SignalStore: An effortless solution for state management
Abdelfattah Ragab
No ratings yet
Quick Python Guide
From Everand
Quick Python Guide
Coder1
No ratings yet
Bash Command Line Pro Tips
From Everand
Bash Command Line Pro Tips
Jason Cannon
4.5/5 (8)
Lisp Interpreter in Rust
From Everand
Lisp Interpreter in Rust
Vishal Patil
1/5 (1)
Python For Beginners
From Everand
Python For Beginners
Célio Azevedo
No ratings yet
Data Science Programming In Python
From Everand
Data Science Programming In Python
Anita Raichand
No ratings yet
10 Lessons in Front-end
From Everand
10 Lessons in Front-end
Krasimir Tsonev
2/5 (1)
How to a Developers Guide to 4k: Developer edition, #3
From Everand
How to a Developers Guide to 4k: Developer edition, #3
Xinc Cyberwizard
No ratings yet
Apache Cassandra Developer Associate - Exam Practice Tests
From Everand
Apache Cassandra Developer Associate - Exam Practice Tests
Cristian Scutaru
No ratings yet
The Project Gutenberg RST Manual
From Everand
The Project Gutenberg RST Manual
Marcello Perathoner
No ratings yet
Advanced C Concepts and Programming: First Edition
From Everand
Advanced C Concepts and Programming: First Edition
Gayatri
3/5 (1)
Introduction to PHP, Part 2, Second Edition
From Everand
Introduction to PHP, Part 2, Second Edition
Adam Majczak
No ratings yet
VPS Server Setup
From Everand
VPS Server Setup
L Mohan Arun
5/5 (1)
C++ Functions and tutorial
From Everand
C++ Functions and tutorial
Nino Paiotta
No ratings yet
Basic DBA Query v.1: Oracle Database
From Everand
Basic DBA Query v.1: Oracle Database
Oraclesql-plsql
5/5 (1)
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet
SQL Server: Tips and Tricks - 1
From Everand
SQL Server: Tips and Tricks - 1
Priyanka Agarwal
5/5 (1)
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet
DBMS Lab Manual
From Everand
DBMS Lab Manual
Jitendra Patel
1.5/5 (3)
Core Java Programming Book
From Everand
Core Java Programming Book
Manish Soni
No ratings yet
Profound Linux For Developers
From Everand
Profound Linux For Developers
Onder Teker
No ratings yet
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
Oracle Certified Professional Java Programmer OCPJP 1Z0 809
From Everand
Oracle Certified Professional Java Programmer OCPJP 1Z0 809
Manish Soni
No ratings yet
Inspiring Powershell Articles
From Everand
Inspiring Powershell Articles
Murat Yildirimoglu
No ratings yet
Python: Advanced Guide to Programming Code with Python
From Everand
Python: Advanced Guide to Programming Code with Python
Charlie Masterson
No ratings yet
Python: Advanced Guide to Programming Code with Python: Python Computer Programming, #4
From Everand
Python: Advanced Guide to Programming Code with Python: Python Computer Programming, #4
Charlie Masterson
No ratings yet
TensorFlow深度学习项目实战: Chinese Edition
From Everand
TensorFlow深度学习项目实战: Chinese Edition
Posts & Telecom Press
No ratings yet
UNIX Shell Programming Interview Questions You'll Most Likely Be Asked
From Everand
UNIX Shell Programming Interview Questions You'll Most Likely Be Asked
Vibrant Publishers
No ratings yet

Source Code Python Jemmy

Uploaded by

Source Code Python Jemmy

Uploaded by

1.

pip install google-play-scraper

from google_play_scraper import Sort, reviews

# ------ Case Folding --------

print('Case Folding Result : \n')

# import word_tokenize & FreqDist from NLTK

# ------ Tokenizing ---------

#remove whitespace leading & trailing

#remove multiple whitespace into single whitespace

# remove single char

# NLTK word rokenize

print('Tokenizing Result : \n')

# NLTK calc frequency distribution

print('Frequency Tokens : \n')

from nltk.corpus import stopwords

# ----------------------- get stopword from NLTK stopword -------------------------------

# ---------------------------- manualy add stopword ------------------------------------

# ----------------------- add stopword from txt file ------------------------------------

# convert stopword string to list & append additional stopword

# convert list to dictionary

for index, row in normalizad_word.iterrows():

conda install -c conda-forge swifter

# import Sastrawi package

for document in TWEET_DATA['content_normalized']:

for term in term_dict:

# apply stemmed term to dataframe

You might also like