|
- import PyPDF2
- from PyPDF2 import PdfWriter, PdfReader
- from PIL import Image, ImageDraw, ImageFont
- import fitz # PyMuPDF
- import pandas as pd
- COL_NAME = 3
- COL_VALUE = 5
- def add_stamp(content_page,x1,y1,text, image_path=''):
- image = Image.open(image_path)
- pdf = Image.new('RGBA',(595,841),color=(255,255,255,255))
-
- draw = ImageDraw.Draw(pdf)
- font = ImageFont.truetype('simsun.ttc', 10)
- draw.text((100,y1-30), text, font=font, fill=(255, 0, 0))
- if len(image_path)>0:
- pdf.paste(image, (x1,y1))
- pdf.save('temp.pdf', 'PDF')
- image_input = PdfReader('temp.pdf')
- image_page = image_input.pages[0]
- image_page.merge_page(content_page)
- return image_page
- #add_stamp('ah.pdf', 'sign.png')
- def foundTextInPDF(doc,page_number_start,texts,texts_pre =[],mode='first'):
- #print(texts)
- # print(texts_pre)
- page_number = page_number_start
- flag_running = True
- flag_pre = False
- while page_number<len(doc) and flag_running:
- page = doc[page_number]
- #前提页已经找到
- flag_pre_found_all = True
- if len(texts_pre)>0:
- for text in texts_pre:
-
- instances = page.search_for(text)
- if len(instances) == 0:
- flag_pre_found_all = False
- else:
- flag_pre = True #如果没有前提条件直接判定满足
- if flag_pre_found_all:
- flag_pre = True
- print(texts_pre)
- print('上述前提页已经在'+str(page_number)+'页找到')
-
-
- if flag_pre:
- #搜索当前页是否为目标页
- flag_found_all = True
- for text in texts:
- instances = page.search_for(text)
- if len(instances) == 0:
- flag_found_all = False
- if flag_found_all:
- #记录目标物位置
- page_number_target = page_number
- text_instances = page.search_for(texts[-1]) # 你可以替换为你要搜索的文本
- for inst in text_instances:
- x1, y1, x2, y2 = inst#.rect
- x1 = int(x1)
- y1 = int(y1)
- x2 = int(x2)
- y2 = int(y2)
- if mode =='first':
- flag_running = False
- page_number = page_number+1
-
- if 'page_number_target' not in locals():#没有找到目标赋值-1
- page_number_target = -1
- x2 = 10000
- y2 = 10000
- return page_number_target,x2,y2
- #1打开excel
- #定位到第一个姓名
- def get_record(pdf_path = '' , excel_name ='aaa.xlsx', sheet_name='Sheet1',
- COL_NAME=3 ,ROW_NAME_RANGE = range(1,33),COL_VALUES=3,text_pre_later = '' ):
- df = pd.read_excel(excel_name, sheet_name)
- print(df)
- doc = fitz.open(pdf_path)
- record = {}
- page_number_target = 0;
- for row in ROW_NAME_RANGE:
- #print(row)
- name = df.iloc[row,COL_NAME]
- #2遍历PDF页码,尝试找定位姓名
- #print(name)
- page_number_target,x,y = foundTextInPDF(doc,page_number_target,
- ['评分'],
- [name,text_pre_later])
- if x != 10000:
- values = '';
- for col in COL_VALUES:
- values+= str(df.iloc[0, col])+':'+str(df.iloc[row, col])+';'
- record[page_number_target] = {'x':x,'y':y, 'value':values}
- print(name+' 已经批阅')
- else:
- print(name+' not found')
- doc.close()
- return record
-
- pdf_path = '1.pdf'
- pdf_path_out = '期末考核记录单_电子电路CAD_22电信本4_评阅版.pdf'
- COL_VALUES_SET=[[3,4,5,6,8,12,15]]
- texts_pre_set = ['记录单']
- for i in range(1):#看多少轮,实验报告等材料需要设定
- print(texts_pre_set[i])
- record = get_record(pdf_path = pdf_path,
- excel_name ='【成绩总表】电子电路CAD_22电信_成绩.xlsx',
- sheet_name='期末考核_22电信本4',
- COL_NAME = 2 ,
- ROW_NAME_RANGE = range(2,29),
- COL_VALUES = COL_VALUES_SET[i],
- text_pre_later = texts_pre_set[i]
- )
- print(record)
- pdf_input = PdfReader(pdf_path)
- pdf_output = PdfWriter()
- for page_number in range(len(pdf_input.pages)):
- content_page = pdf_input.pages[page_number]
- if page_number in record:
- print(page_number)
- content_page = add_stamp(content_page,
- record[page_number]['x'],
- record[page_number]['y']+40,
- record[page_number]['value'], 'sign1.png')
- pdf_output.add_page(content_page)
- with open(pdf_path_out, 'wb') as output_file:
- pdf_output.write(output_file)
复制代码 |
|