유용한 모듈들¶
PyPDF2¶
PDF 문서들을 페이지별로 합쳤다가 분리할 수 있는 모듈입니다. 여기를 참조했습니다.
설치¶
다음과 같이 설치합니다.
conda install -c conda-forge pypdf2 # 또는
pip install pypdf2
페이지 분리¶
다음 코드는 페이지를 분리하는 것입니다.
#pdf_splitter.py
import os
from PyPDF2 import PdfFileReader, PdfFileWriter
def pdf_splitter(path):
fname = os.path.splitext(os.path.basename(path))[0]
pdf = PdfFileReader(path)
for page in range(pdf.getNumPages()):
pdf_writer = PdfFileWriter()
pdf_writer.addPage(pdf.getPage(page))
output_filename = '{}_page_{}.pdf'.format(
fname, page+1)
with open(output_filename, 'wb') as out:
pdf_writer.write(out)
print('Created: {}'.format(output_filename))
if __name__ == '__main__':
path = 'w9.pdf'
pdf_splitter(path)
파일 병합¶
다음은 파일들을 합치는 코드입니다. 신경써야하는 부분은 __main__
문의 paths
로 정렬되는 순서대로 파일명을 입력하시면 됩니다.
# pdf_merger.py
import glob
from PyPDF2 import PdfFileWriter, PdfFileReader
def merger(output_path, input_paths):
pdf_writer = PdfFileWriter()
for path in input_paths:
pdf_reader = PdfFileReader(path)
for page in range(pdf_reader.getNumPages()):
pdf_writer.addPage(pdf_reader.getPage(page))
with open(output_path, 'wb') as fh:
pdf_writer.write(fh)
if __name__ == '__main__':
# paths = glob.glob('w9_*.pdf')
# paths.sort()
paths = [
'0_TOC.pdf',
'1_acknowledgements.pdf',
'2_notation.pdf',
'3_ch1_intro.pdf',
'4_part1_basics.pdf',
'ch2_linear_algebra.pdf',
'ch3_prob.pdf',
'ch4_numerical.pdf',
'ch5_ml.pdf',
'part2_practical.pdf',
'ch6_mlp.pdf',
'ch7_regularization.pdf',
'ch8_optimization.pdf',
'ch9_convnets.pdf',
'ch10_rnn.pdf',
'ch11_guidelines.pdf',
'ch12_applications.pdf',
'part3_research.pdf',
'ch13_linear_factors.pdf',
'ch14_autoencoders.pdf',
'ch15_representation.pdf',
'ch16_graphical_models.pdf',
'ch17_monte_carlo.pdf',
'ch18_partition.pdf',
'ch19_inference.pdf',
'ch20_generative_models.pdf',
'99_bib.pdf',
'index.pdf',
'Errata in published editions of Deep Learning.pdf',
]
merger('pdf_merger.pdf', paths)
더 간단한 코드는 다음과 같습니다.
# pdf_merger2.py
import glob
from PyPDF2 import PdfFileMerger
def merger(output_path, input_paths):
pdf_merger = PdfFileMerger()
file_handles = []
for path in input_paths:
pdf_merger.append(path)
with open(output_path, 'wb') as fileobj:
pdf_merger.write(fileobj)
if __name__ == '__main__':
paths = glob.glob('w9_*.pdf')
paths.sort()
merger('pdf_merger2.pdf', paths)
PdfFileMerger
는 다음과 같은 merge
메소드를 제공합니다.
def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True) :
"""
Merges the pages from the given file into the output file at the
specified page number.
:param int position: The *page number* to insert this file. File will
be inserted after the given number.
:param fileobj: A File Object or an object that supports the standard read
and seek methods similar to a File Object. Could also be a
string representing a path to a PDF file.
:param str bookmark: Optionally, you may specify a bookmark to be applied at
the beginning of the included file by supplying the text of the bookmark.
:param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
to merge only the specified range of pages from the source
document into the output document.
:param bool import_bookmarks: You may prevent the source document's bookmarks
from being imported by specifying this as ``False``.
"""
북마크 삽입¶
PDF XChange Editor를 이용해서 북마크를 입력하시면 됩니다.