NLP-pyth
NLP-pyth
1. Regular Expression
import re
pattern = r'\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b'
print(emails)
- `[a-zA-Z0-9._%+-]+`: Matches the username part, which can include letters, digits, and
special characters.
- `[a-zA-Z0-9.-]+`: Matches the domain name, which can include letters, digits, dots, and
hyphens.
- `\.[a-zA-Z]{2,}`: Matches the top-level domain (e.g., `.com`, `.org`), requiring at least two
letters.
pattern = r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b'
print(phones)
- `\b`: Word boundary.
pattern = r'\b\d{2}/\d{2}/\d{4}\b'
print(dates)
import re
pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
if match:
e. Match function
import re
text = "Hello World!"
pattern = r'Hello'
match = re.match(pattern, text)
if match:
print("Match found at the beginning!")
else:
print("No match at the beginning.")
f. sub function
g. Split function
h. Finditer function
2. Stemming
a. PorterStemmer
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()
words = ["running", "ran", "easily", "fairly"]
stems = [stemmer.stem(word) for word in words]
print(stems)
# Output: ['run', 'ran', ' easili', 'fairli']
b. Lancaster Stemmer
from nltk.stem import LancasterStemmer
# Initialize the Lancaster Stemmer
lancaster_stemmer = LancasterStemmer()
# Example words to stem
words = ["running", "ran", "easily", "fairly", "happiness"]
# Apply stemming
lancaster_stems = [lancaster_stemmer.stem(word) for word in words]
print("Lancaster Stemmer:", lancaster_stems)
# Output: ['run', 'ran', 'easy', 'fair', 'happi']
c. Snowball stemmer
from nltk.stem import SnowballStemmer
# Initialize the Snowball Stemmer for English
snowball_stemmer = SnowballStemmer("english")
# Example words to stem
words = ["running", "ran", "easily", "fairly", "happiness"]
# Apply stemming
snowball_stems = [snowball_stemmer.stem(word) for word in words]
print("Snowball Stemmer:", snowball_stems)
# Output: ['run', 'ran', 'easili', 'fairli', 'happi']
d. Compare all