1.5 Text Basics - Practice exercises
The post contains additional practice exercises on the topics learnt in chapter 1
abbr = 'NLP'
full_text = 'Natural Language Processing'
### Enter your code here:
Solution
abbr = 'NLP'
full_text = 'Natural Language Processing'
print(f'{abbr} stands for {full_text}')
%%writefile contacts.txt
First_Name Last_Name, Title, Extension, Email
Solution
%%writefile contacts.txt
First_Name Last_Name, Title, Extension, Email
### Write your code here:
### Run fields to see the contents of contacts.txt:
fields
Solution
with open("contacts.txt") as text:
fields = text.read()
fields
# Open the file as a binary object
# Use PyPDF2 to read the text of the file
# Get the text from page 2 (CHALLENGE: Do this in one step!)
page_two_text =
# Close the file
# Print the contents of page_two_text
print(page_two_text)
Solution
import PyPDF2
# Open the file as a binary object
pdf1 = open("data_files/Business_Proposal.pdf", 'rb')
# Use PyPDF2 to read the text of the file
pdf_reader = PyPDF2.PdfFileReader(pdf1)
# Get the text from page 2 (CHALLENGE: Do this in one step!)
page_two_text = pdf_reader.getPage(1).extractText()
# Close the file
pdf1.close()
# Print the contents of page_two_text
print(page_two_text)
import re
re.findall(r'[^(AUTHORS:)]', page_two_text)
**Simple Solution**
myfile = open('contacts.txt', 'a+')
myfile.seek(0)
print(myfile.read())
myfile.write(page_two_text)
myfile.seek(0)
print(myfile.read())
myfile.close()
with open('contacts.txt','a+') as c:
c.write(page_two_text[8:])
c.seek(0)
print(c.read())
import re
# Enter your regex pattern here. This may take several tries!
pattern =
re.findall(pattern, page_two_text)
Solution
import re
pattern = r'\w+@\w+.com'
re.findall(pattern, page_two_text)