code-output
code-output
import nltk
from nltk.corpus import stopwords
import re
import subprocess
import pandas as pd
import numpy as np
Part 1: Parsing
Read resume pdf
http://localhost:8888/notebooks/Documents/GitHub/resume-evaluation-nlp/final-project.ipynb#Part-1:-Parsing Page 1 of 12
final-project - Jupyter Notebook 2021-12-09, 2:30 AM
df.head()
Out[2]:
path text
resumes-list/resume-example-option-
0 [email protected]\n(123) 456-7890\nWash...
software-en...
resumes-list/resume-example-option-project-
1 Stephen Greet\nProject Manager\nPMP certified p...
man...
resumes-list/resume-example-option-
2 Ashley Doyle, Esq\n\[email protected]\n\n(1...
attorney.pdf
resumes-list/resume-example-option-
3 Stephen Greet\nSales Associate\n\nWork Experie...
sales.pdf
http://localhost:8888/notebooks/Documents/GitHub/resume-evaluation-nlp/final-project.ipynb#Part-1:-Parsing Page 2 of 12
final-project - Jupyter Notebook 2021-12-09, 2:30 AM
return person_names
df.head()
Out[3]:
path text name
Extract phone-number
http://localhost:8888/notebooks/Documents/GitHub/resume-evaluation-nlp/final-project.ipynb#Part-1:-Parsing Page 3 of 12
final-project - Jupyter Notebook 2021-12-09, 2:30 AM
def extract_phone_number(resume_text):
phone = re.findall(phone_regex, resume_text)
if phone:
number = ''.join(phone[0])
df.head()
Out[4]:
path text name phone
Extract email
http://localhost:8888/notebooks/Documents/GitHub/resume-evaluation-nlp/final-project.ipynb#Part-1:-Parsing Page 4 of 12
final-project - Jupyter Notebook 2021-12-09, 2:30 AM
df.head()
Out[5]:
path text name phone email
resumes-
list/resume- (123)
[email protected]\n(123) Github
0 example- 456- [[email protected]]
456-7890\nWash... SKILLS
option- 7890
software-en...
resumes-
list/resume- (123)
Stephen Greet\nProject
1 example- Stephen 456- [[email protected]]
Manager\nPMP certified p...
option- 7890
project-man...
resumes-
list/resume- (123)
Ashley Doyle,
2 example- Ashley 456- [[email protected]]
Esq\n\[email protected]\n\n(1...
option- 7890
attorney.pdf
resumes-
list/resume- (123)
Stephen Greet\nSales
3 example- Stephen 456- [[email protected]]
Associate\n\nWork Experie...
option- 7890
sales.pdf
resumes-
list/data- (123)
KANDICE LOUDOR\n\nDATA
4 scientist- Github 456- [[email protected]]
SCIENTIST\n\nCONTACT\n\...
resume- 7890
example.pdf
Extract school
In [6]: school_keywords = [
'school',
'college',
'university',
'academy',
'faculty',
'institute',
'diploma',
]
def extract_education(input_text):
http://localhost:8888/notebooks/Documents/GitHub/resume-evaluation-nlp/final-project.ipynb#Part-1:-Parsing Page 5 of 12
final-project - Jupyter Notebook 2021-12-09, 2:30 AM
def extract_education(input_text):
organizations = []
education = set()
for org in organizations:
for word in school_keywords:
if org.lower().find(word) >= 0:
education.add(org)
return education
df
Out[6]:
path text name phone
resumes-list/resume- (123)
[email protected]\n(123) Github
0 example-option- 456- [[email protected]
456-7890\nWash... SKILLS
software-en... 7890
resumes-list/resume- (123)
Stephen Greet\nProject
1 example-option- Stephen 456- [[email protected]
Manager\nPMP certified p...
project-man... 7890
resumes-list/resume- (123)
Ashley Doyle,
2 example-option- Ashley 456- [[email protected]
Esq\n\[email protected]\n\n(1...
attorney.pdf 7890
resumes-list/resume- (123)
Stephen Greet\nSales
3 example-option- Stephen 456- [[email protected]
Associate\n\nWork Experie...
sales.pdf 7890
resumes-list/data- (123)
KANDICE LOUDOR\n\nDATA
4 scientist-resume- Github 456- [[email protected]
SCIENTIST\n\nCONTACT\n\...
example.pdf 7890
resumes-list/full-stack- (123)
ALEKS LUDKEE\nFull-Stack
5 developer-resume- ALEKS 456- [[email protected]
Developer\n\nludkee.a...
examp... 7890
Niantic
resumes-list/entry- Data (123)
Trish Mathers\nEntry-Level Data
7 level-data-scientist- Scientist 456- [[email protected]
Scientist\nInn...
resume... Intern 7890
Seattle
http://localhost:8888/notebooks/Documents/GitHub/resume-evaluation-nlp/final-project.ipynb#Part-1:-Parsing Page 6 of 12
final-project - Jupyter Notebook 2021-12-09, 2:30 AM
resumes-list/resume- (123)
ALICE LEWIS, APRN\n\nNurse San
9 example-option- 456- [[email protected]
Practitioner\n\nCON... Diego
nurse.pdf 7890
def extract_job_titles(input_text):
stop_words = set(nltk.corpus.stopwords.words('english'))
word_tokens = nltk.tokenize.word_tokenize(input_text)
#preprocessing
filtered_tokens = [w for w in word_tokens if w not in stop_words]
filtered_tokens = [w for w in word_tokens if w.isalpha()]
found_skills = set()
for i in filtered_tokens:
if i.lower() in JOB_TITLE_DB:
found_skills.add(i)
for i in grams:
if i.lower() in JOB_TITLE_DB:
found_skills.add(i)
return found_skills
df.head()
Out[8]:
path text name phone email
http://localhost:8888/notebooks/Documents/GitHub/resume-evaluation-nlp/final-project.ipynb#Part-1:-Parsing Page 7 of 12
final-project - Jupyter Notebook 2021-12-09, 2:30 AM
resumes-
list/resume-
(123)
example- [email protected]\n(123) Github
0 456- [[email protected]]
option- 456-7890\nWash... SKILLS
7890
software-
en...
resumes-
list/resume-
(123)
example- Stephen Greet\nProject {Admin
1 Stephen 456- [[email protected]]
option- Manager\nPMP certified p...
7890
project-
man...
resumes-
list/resume- (123)
Ashley Doyle,
2 example- Ashley 456- [[email protected]]
Esq\n\[email protected]\n\n(1...
option- 7890
attorney.pdf
resumes-
list/resume- (123)
Stephen Greet\nSales {Johns
3 example- Stephen 456- [[email protected]]
Associate\n\nWork Experie...
option- 7890
sales.pdf
resumes-
list/data- (123)
KANDICE LOUDOR\n\nDATA
4 scientist- Github 456- [[email protected]]
SCIENTIST\n\nCONTACT\n\...
resume- 7890
example.pdf
Part 2: Evaluation
Calculate similarity between job description and resume
http://localhost:8888/notebooks/Documents/GitHub/resume-evaluation-nlp/final-project.ipynb#Part-1:-Parsing Page 8 of 12
final-project - Jupyter Notebook 2021-12-09, 2:30 AM
http://localhost:8888/notebooks/Documents/GitHub/resume-evaluation-nlp/final-project.ipynb#Part-1:-Parsing Page 9 of 12
final-project - Jupyter Notebook 2021-12-09, 2:30 AM
df.head()
Out[10]:
path text name phone email
resumes-
list/resume- (123)
[email protected]\n(123) Github
1 example- 456- [[email protected]]
456-7890\nWash... SKILLS
option- 7890
software-en...
resumes-
list/resume- (123)
Stephen Greet\nProject
2 example- Stephen 456- [[email protected]]
Manager\nPMP certified p...
option-project- 7890
man...
resumes-
list/resume- (123)
Ashley Doyle,
3 example- Ashley 456- [[email protected]]
Esq\n\[email protected]\n\n(1...
option- 7890
attorney.pdf
resumes-
list/resume- (123)
Stephen Greet\nSales
4 example- Stephen 456- [[email protected]]
Associate\n\nWork Experie...
option- 7890
sales.pdf
http://localhost:8888/notebooks/Documents/GitHub/resume-evaluation-nlp/final-project.ipynb#Part-1:-Parsing Page 10 of 12
final-project - Jupyter Notebook 2021-12-09, 2:30 AM
tfidfvectoriser=TfidfVectorizer()
tfidfvectoriser.fit(df.text_cleaned)
tfidf_vectors=tfidfvectoriser.transform(df.text_cleaned)
similarities=np.dot(tfidf_vectors,tfidf_vectors.T).toarray()
for i in range(len(similarities[0])):
df.loc[i, "similarity"] = similarities[0][i]
df = df.drop(0)
df.reset_index(drop=True, inplace=True)
df
Out[11]:
path text name phone
resumes-list/full-stack- (123)
ALEKS LUDKEE\nFull-Stack
0 developer-resume- ALEKS 456- [[email protected]
Developer\n\nludkee.a...
examp... 7890
resumes-list/resume- (123)
[email protected]\n(123) Github
2 example-option- 456- [[email protected]
456-7890\nWash... SKILLS
software-en... 7890
resumes-list/resume- (123)
Stephen\nGreet\nWeb
3 example-option- Stephen 456- [[email protected]
Ranking Output
http://localhost:8888/notebooks/Documents/GitHub/resume-evaluation-nlp/final-project.ipynb#Part-1:-Parsing Page 11 of 12
final-project - Jupyter Notebook 2021-12-09, 2:30 AM
Out[12]:
path name email similarity
resumes-list/full-stack-developer-
0 ALEKS [[email protected]] 0.143581
resume-examp...
resumes-list/resume-example-
2 Github SKILLS [[email protected]] 0.101460
option-software-en...
resumes-list/resume-example-
3 Stephen [[email protected]] 0.079581
option-college-stu...
resumes-list/resume-example-
4 Stephen [[email protected]] 0.079037
option-project-man...
resumes-list/data-scientist-resume-
5 Github [[email protected]] 0.052557
example.pdf
resumes-list/resume-example-
7 San Diego [[email protected]] 0.030303
option-nurse.pdf
resumes-list/resume-example-
8 Stephen [[email protected]] 0.028344
option-sales.pdf
resumes-list/resume-example-
9 Ashley [[email protected]] 0.021063
option-attorney.pdf
http://localhost:8888/notebooks/Documents/GitHub/resume-evaluation-nlp/final-project.ipynb#Part-1:-Parsing Page 12 of 12