修复pdf分解bug

这个提交包含在:
Your Name
2023-04-18 16:14:30 +08:00
父节点 4c486f27c8
当前提交 d35d7710c1

查看文件

@@ -444,6 +444,7 @@ def read_and_clean_pdf_text(fp):
pf = 998 pf = 998
for l in t['lines']: for l in t['lines']:
txt_line = "".join([wtf['text'] for wtf in l['spans']]) txt_line = "".join([wtf['text'] for wtf in l['spans']])
if len(txt_line) == 0: continue
pf = primary_ffsize(l) pf = primary_ffsize(l)
meta_line.append([txt_line, pf, l['bbox'], l]) meta_line.append([txt_line, pf, l['bbox'], l])
for wtf in l['spans']: # for l in t['lines']: for wtf in l['spans']: # for l in t['lines']: