镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 14:36:48 +00:00
修复pdf分解bug
这个提交包含在:
@@ -444,6 +444,7 @@ def read_and_clean_pdf_text(fp):
|
|||||||
pf = 998
|
pf = 998
|
||||||
for l in t['lines']:
|
for l in t['lines']:
|
||||||
txt_line = "".join([wtf['text'] for wtf in l['spans']])
|
txt_line = "".join([wtf['text'] for wtf in l['spans']])
|
||||||
|
if len(txt_line) == 0: continue
|
||||||
pf = primary_ffsize(l)
|
pf = primary_ffsize(l)
|
||||||
meta_line.append([txt_line, pf, l['bbox'], l])
|
meta_line.append([txt_line, pf, l['bbox'], l])
|
||||||
for wtf in l['spans']: # for l in t['lines']:
|
for wtf in l['spans']: # for l in t['lines']:
|
||||||
|
|||||||
在新工单中引用
屏蔽一个用户