Skip to content

Commit

Permalink
Merge pull request #386 from myhloli/master
Browse files Browse the repository at this point in the history
feat(draw_bbox): add model bbox drawing functionality
  • Loading branch information
myhloli authored Aug 9, 2024
2 parents e7b0f8b + c90ee89 commit fa3475a
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 2 deletions.
67 changes: 66 additions & 1 deletion magic_pdf/libs/draw_bbox.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from magic_pdf.libs.Constants import CROSS_PAGE
from magic_pdf.libs.commons import fitz # PyMuPDF
from magic_pdf.libs.ocr_content_type import ContentType, BlockType
from magic_pdf.libs.ocr_content_type import ContentType, BlockType, CategoryId
from magic_pdf.model.magic_model import MagicModel


def draw_bbox_without_number(i, bbox_list, page, rgb_config, fill_config):
Expand Down Expand Up @@ -225,3 +226,67 @@ def get_span_info(span):

# Save the PDF
pdf_docs.save(f"{out_path}/spans.pdf")


def drow_model_bbox(model_list: list, pdf_bytes, out_path):
dropped_bbox_list = []
tables_body_list, tables_caption_list, tables_footnote_list = [], [], []
imgs_body_list, imgs_caption_list = [], []
titles_list = []
texts_list = []
interequations_list = []
pdf_docs = fitz.open("pdf", pdf_bytes)
magic_model = MagicModel(model_list, pdf_docs)
for i in range(len(model_list)):
page_dropped_list = []
tables_body, tables_caption, tables_footnote = [], [], []
imgs_body, imgs_caption = [], []
titles = []
texts = []
interequations = []
page_info = magic_model.get_model_list(i)
layout_dets = page_info["layout_dets"]
for layout_det in layout_dets:
bbox = layout_det["bbox"]
if layout_det["category_id"] == CategoryId.Text:
texts.append(bbox)
elif layout_det["category_id"] == CategoryId.Title:
titles.append(bbox)
elif layout_det["category_id"] == CategoryId.TableBody:
tables_body.append(bbox)
elif layout_det["category_id"] == CategoryId.TableCaption:
tables_caption.append(bbox)
elif layout_det["category_id"] == CategoryId.TableFootnote:
tables_footnote.append(bbox)
elif layout_det["category_id"] == CategoryId.ImageBody:
imgs_body.append(bbox)
elif layout_det["category_id"] == CategoryId.ImageCaption:
imgs_caption.append(bbox)
elif layout_det["category_id"] == CategoryId.InterlineEquation_YOLO:
interequations.append(bbox)
elif layout_det["category_id"] == CategoryId.Abandon:
page_dropped_list.append(bbox)

tables_body_list.append(tables_body)
tables_caption_list.append(tables_caption)
tables_footnote_list.append(tables_footnote)
imgs_body_list.append(imgs_body)
imgs_caption_list.append(imgs_caption)
titles_list.append(titles)
texts_list.append(texts)
interequations_list.append(interequations)
dropped_bbox_list.append(page_dropped_list)

for i, page in enumerate(pdf_docs):
draw_bbox_with_number(i, dropped_bbox_list, page, [158, 158, 158], True) # color !
draw_bbox_with_number(i, tables_body_list, page, [204, 204, 0], True)
draw_bbox_with_number(i, tables_caption_list, page, [255, 255, 102], True)
draw_bbox_with_number(i, tables_footnote_list, page, [229, 255, 204], True)
draw_bbox_with_number(i, imgs_body_list, page, [153, 255, 51], True)
draw_bbox_with_number(i, imgs_caption_list, page, [102, 178, 255], True)
draw_bbox_with_number(i, titles_list, page, [102, 102, 255], True)
draw_bbox_with_number(i, texts_list, page, [153, 0, 76], True)
draw_bbox_with_number(i, interequations_list, page, [0, 255, 0], True)

# Save the PDF
pdf_docs.save(f"{out_path}/model.pdf")
14 changes: 14 additions & 0 deletions magic_pdf/libs/ocr_content_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,17 @@ class BlockType:
Footnote = "footnote"
Discarded = "discarded"


class CategoryId:
Title = 0
Text = 1
Abandon = 2
ImageBody = 3
ImageCaption = 4
TableBody = 5
TableCaption = 6
TableFootnote = 7
InterlineEquation_Layout = 8
InlineEquation = 13
InterlineEquation_YOLO = 14
OcrText = 15
2 changes: 2 additions & 0 deletions magic_pdf/tools/cli_dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def jsonl(jsonl, method, output_dir):
jso["doc_layout_result"],
method,
f_dump_content_list=True,
f_draw_model_bbox=True,
)


Expand Down Expand Up @@ -146,6 +147,7 @@ def read_fn(path):
model_json_list,
method,
f_dump_content_list=True,
f_draw_model_bbox=True,
)


Expand Down
5 changes: 4 additions & 1 deletion magic_pdf/tools/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import click
from loguru import logger
from magic_pdf.libs.MakeContentConfig import DropMode, MakeMode
from magic_pdf.libs.draw_bbox import draw_layout_bbox, draw_span_bbox
from magic_pdf.libs.draw_bbox import draw_layout_bbox, draw_span_bbox, drow_model_bbox
from magic_pdf.pipe.UNIPipe import UNIPipe
from magic_pdf.pipe.OCRPipe import OCRPipe
from magic_pdf.pipe.TXTPipe import TXTPipe
Expand Down Expand Up @@ -37,6 +37,7 @@ def do_parse(
f_dump_orig_pdf=True,
f_dump_content_list=False,
f_make_md_mode=MakeMode.MM_MD,
f_draw_model_bbox=False,
):
orig_model_list = copy.deepcopy(model_list)
local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name, parse_method)
Expand Down Expand Up @@ -73,6 +74,8 @@ def do_parse(
draw_layout_bbox(pdf_info, pdf_bytes, local_md_dir)
if f_draw_span_bbox:
draw_span_bbox(pdf_info, pdf_bytes, local_md_dir)
if f_draw_model_bbox:
drow_model_bbox(orig_model_list, pdf_bytes, local_md_dir)

md_content = pipe.pipe_mk_markdown(
image_dir, drop_mode=DropMode.NONE, md_make_mode=f_make_md_mode
Expand Down

0 comments on commit fa3475a

Please sign in to comment.