(博主亲自录制视频)
cpg数据库处理_找到未提取的pdf,存放于文件夹Chinese_undeal_pdfs
move_unextracted_pdfs.py
# -*- coding: utf-8 -*-"""Created on Sun Sep 18 17:06:15 2016@author: Administrator"""# -*- coding: utf-8 -*-"""Spyder EditorThis is a temporary script file."""import shutil,xlrdexcelFilename="unextracted.xlsx"sheetName="Sheet1"data = xlrd.open_workbook(excelFilename)table = data.sheets()[0] #总pdf列表totalpdfs_list=table.col_values(0)[1:]extractedpdfs_list=table.col_values(1)[1:]#已经提取的pdf文件列表extractedpdfs_list1=[i for i in extractedpdfs_list if i!=""]#未被提取的pdf文件列表unextractedPdfs_list=[i for i in totalpdfs_list if i not in extractedpdfs_list1]#移动失败的文件列表failed_files=[]#移动函数,目录里不匹配文件移入unmatching_file文件夹def RemoveFile(): dir="Chinese_undeal_pdfs" for file in unextractedPdfs_list: try: shutil.move(file,dir) except: failed_files.append(file) continueRemoveFile()
移动英语pdf文件
remove_englishFile.py
# -*- coding: utf-8 -*-"""Spyder Editorremove_englishFile.pyThis is a temporary script file."""import shutil,xlrdexcelFilename="be_cpg_English.xlsx"sheetName="Sheet1"data = xlrd.open_workbook(excelFilename)table = data.sheets()[0] EnglishFile_list=table.col_values(0)[1:]#移动函数,目录里不匹配文件移入unmatching_file文件夹def RemoveFile(): dir="English" for file in EnglishFile_list: shutil.move(file,dir)