-
Notifications
You must be signed in to change notification settings - Fork 1
/
resume_to_text.py
53 lines (41 loc) · 1.6 KB
/
resume_to_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import os
import PyPDF2
class FileNotFound(Exception):
pass
def convert_pdf_to_text(pdf_filename):
# Check if the file exists
if not os.path.exists(pdf_filename):
raise FileNotFound(f"The file '{pdf_filename}' does not exist.")
try:
# Open the PDF file
pdf_file = open(pdf_filename, 'rb')
# Create a PDF reader object
pdf_reader = PyPDF2.PdfReader(pdf_file)
# Initialize an empty string to store the text
text = ""
# Loop through each page in the PDF and extract text
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text()
# Call the function to save the text to a text file
save_text_to_file(pdf_filename, text)
except Exception as e:
print(f"An error occurred: {e}")
finally:
pdf_file.close()
def save_text_to_file(pdf_filename, text):
# Create the "generations" folder if it doesn't exist
if not os.path.exists("generations"):
os.makedirs("generations")
# Create a text file with the same name as the PDF in the "generations" folder
txt_filename = os.path.join("generations", os.path.splitext(os.path.basename(pdf_filename))[0] + ".txt")
with open(txt_filename, 'w', encoding='utf-8') as txt_file:
txt_file.write(text)
print(f"Text extracted and saved to '{txt_filename}'.")
def main():
# Input PDF file name
pdf_filename = "generations/AyushYadavResume.pdf"
# Extract text from the PDF
convert_pdf_to_text(pdf_filename)
if __name__ == "__main__":
main()