o
    nN)j                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZ ddl	m
Z
 dd Zdd Zd5ddZdd Zdd Zd6ddZdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Z e!d4kre   dS dS )7    N)DictListSet   )pymupdfc                 C   s   d|  d ddS )N K   -)center)x r   q/var/www/html/finance.cargoinsureonline.com/_shared/backend-venv/lib/python3.10/site-packages/pymupdf/__main__.pymycenter   s   r   c                 C   s   |d }|d }|dkr|  |S dd }t| |}t| |}	 |j|jkr;|j|j  kr4dkr;n n|jdksRtd| d| d t| d}||S t|}||j d }}||S )	zReturn image for a given XREF.r   r   c                 S   s"   | j jdkr| S ttj| }|S )N   )
colorspacenr   PixmapcsRGB)pixtpixr   r   r   getimage   s   zrecoverpix.<locals>.getimagezWarning: unsupported /SMask z for :N)	extract_imager   r   irectalphar   message	set_alphasamples)docitemr   sr   pix1pix2r   r   r   r   
recoverpix   s"   
2

r#   FTc                 C   s   t | }|js|du rtd d}|js|S |r<||}|s&td |du r:d}|dkr2d}t d|  |S td	|j d
 |S )z!Open and authenticate a document.Tz$this command supports PDF files onlyzauthentication unsuccessfuluser   ownerzauthenticated as 'z' requires a password)	r   openis_pdfsysexit
needs_passauthenticater   name)filenamepasswordshowpdfr   rc
auth_levelr   r   r   	open_file9   s$   



r6   c                 C   sN   t dd |  D d }|  D ]\}}|| d| }t| qdS )zPrint a Python dictionary.c                 S   s   g | ]}t |qS r   )len).0kr   r   r   
<listcomp>Q       zprint_dict.<locals>.<listcomp>r   : N)maxkeysitemsrjustr   r   )r   lr9   vmsgr   r   r   
print_dictO   s
   rD   c                 C   s   t |dd | |}t | | |rJ| }z|dd }|| }|dr/d}W n ty;   d}Y nw t d| d t d	 t d
 dS )zPrint an object given by XREF number.

    Simulate the PDF source in "pretty" format.
    For a stream also print its size.
    dz 0 objz/Lengthr   z0 Runknownz
stream
...z bytes	endstreamendobjN)r   r   xref_objectxref_is_streamsplitindexendswith	Exception)r   xrefxref_strtempidxsizer   r   r   
print_xrefW   s"   




rT   pagec                 C   s~  t |d }| d|dd} | d}g }t|D ]\}}|d }| rLt|}	d|	  kr5|k r?n n|t| ntd| d|d qz|d	\}
}t|
}
t|}W n t	yr   td| d
|d Y nw d|
  kr}|k rn nd|  kr|k sn td| d
|d |
|kr||
 q|
|k r|t
t|
|d 7 }q|t
t|
|d d7 }q|S )aK  Transform a page / xref specification into a list of integers.

    Args
    ----
        rlist: (str) the specification
        limit: maximum number, i.e. number of pages, number of objects
        what: a string to be used in error messages
    Returns
    -------
        A list of integers representing the specification.
    r   Nr    ,zbad z specification at item rE   r	   z range specification at item r$   )strreplacerK   	enumerate	isdecimalintappendr+   r,   rN   listrange)rlistlimitwhatrV   	rlist_arrout_listseqr   r   ii1i2r   r   r   get_listn   s8   
.
rj   c                 C   s  t | j| jd}tj| jd }d}|dkr|d }d}t|d}|j}t	d| j|j
| d |||d |d	 f  |j}|d
kr\| }d}|dkrPd}t	d| d| d | }|d
krnt	d|dd t	  | jrt	td | }t|| t	  | jrt	td t|j t	  | jrt	td t| j| dd}	|	D ]}t|| t	  q| jrt	td t| j|j
d }
|
D ]}|d }||}t	d|dd t|| t	  q| jrt	td t	|  t	  |  d S )NTi   KBi  MBr   z7'%s', pages: %i, objects: %i, %g %s, %s, encryption: %sformat
encryptionr   rW      znot zdocument contains z root form fields and is signedrE    embedded fileszPDF catalogzPDF metadatazobject informationrO   )rc   zpage informationzPage r   zPDF trailer)r6   inputr1   ospathgetsizeroundmetadatar   r   
page_countxref_lengthis_form_pdfget_sigflagsembfile_countcatalogr   pdf_catalogrT   rD   xrefsrj   pages	page_xreftrailerpdf_trailerclose)argsr   rS   flagmetar   r    sign_strrO   xreflpagelpnor   r   r   r2      sx   








r2   c                 C   s   t | j| jdd}| j}d|}| js0|j| j| j| j	| j
| j| j| j|| j| j| jd d S t| j|jd }t }|D ]}|d }|j|||d q?|j| j| j| j	| j
| j| j| j|| j| j| jd |  |  d S )NTr3   keepnonezrc4-40zrc4-128zaes-128zaes-256)
garbagedeflateprettycleanasciilinearrn   owner_pwuser_pwpermissionsr   	from_pageto_page)r6   rr   r1   rn   rL   r   saveoutputr   compressr   sanitizer   r   r'   r%   
permissionrj   rx   r   r)   
insert_pdfr   )r   r   rn   encryptr   outdocr   r   r   r   r   r      sR   r   c           
      C   s   | j }t }|D ]T}|d}t|dkr|d nd}t|d |dd}d|dd }|r@td|dd |jd }nt	d|jd }|D ]}	|j
||	d |	d d qJ|  q	|j| jd	dd
 |  dS )z&Join pages from several PDF documents.rX   r   Nr   Tr   r&   r   r   )r   r   )rr   r   r)   rK   r7   r6   joinrj   rx   r`   r   r   r   r   )
r   doc_listr   src_itemsrc_listr1   srcr   	page_listrg   r   r   r   doc_join  s   
 
r   c           	      C   sP  t | j| jdd}| s| jr| j| jkrtd t | j| j}| j	r*t
| j	nt
 }t
| }|r?||ks>td n|}|sHtd |t
| @ }|r\tdt|  |D ])}||}||}|j|||d |d |d	 d
 td| d|j	 d q^|  | jr| j| jkr|j| jdd n|  |  dS )z!Copy embedded files between PDFs.Tr   cannot save PDF incrementallyz%not all names are contained in sourceznothing to copyz0following names already exist in receiving PDF: r0   	ufilenamedescr0   r   r   zcopied entry 'z' from 'r(   ro   r   N)r6   rr   r1   can_save_incrementallyr   r+   r,   source	pwdsourcer/   setembfile_namesrY   embfile_infoembfile_getembfile_addr   r   r   r   saveIncr)	r   r   r   names	src_names	intersectr   infobuffr   r   r   embedded_copy   sF   




r   c              
   C   s   t | j| jdd}| s| jr| j| jkrtd z|| j W n" t	t
jjfyE } ztd| jd|  W Y d}~nd}~ww | jrO| j| jkrT|  n|j| jdd |  dS )	zDelete an embedded file entry.Tr   r   no such embedded file r<   Nr   r   )r6   rr   r1   r   r   r+   r,   embfile_delr/   
ValueErrorr   mupdfFzErrorBaser   r   r   )r   r   er   r   r   embedded_delH  s    
$
r   c              
   C   s:  t | j| jdd}z|| j}|| j}W n" ttjj	fy9 } zt
d| jd|  W Y d}~nd}~ww | jr@| jn|d }| jsp| jsptj|rXt
d|  tj|}|t tj spt
d|  t|d	}|| W d   n1 sw   Y  td
| j d| d |  dS )z&Retrieve contents of an embedded file.Tr   r   r<   Nr0   z6refusing to overwrite existing file with stored name: z9refusing to write stored name outside current directory: wbzsaved entry 'z' as 'r(   )r6   rr   r1   r   r/   r   r   r   r   r   r+   r,   r   unsafers   rt   existsabspath
startswithgetcwdsepr)   writer   r   )r   r   streamrE   r   r0   filename_absr   r   r   r   embedded_get[  s(   $r   c                 C   s@  t | j| jdd}| s| jdu s| j| jkrtd z|| j td| j d W n	 t	y8   Y nw t
j| jrGt
j| jsQtd| j d t| jd	}| }W d   n1 sfw   Y  | j}|}| jsv|}n| j}|j| j||||d
 | jr| j| jkr|  n|j| jdd |  dS )zInsert a new embedded file.Tr   Nr   zentry 'z' already existszno such file 'r(   rbr   ro   r   )r6   rr   r1   r   r   r+   r,   r   r/   rN   rs   rt   r   isfiler)   readr   r   r   r   r   r   r   fr   r0   r   r   r   r   r   embedded_addp  s6   


r   c                 C   sh  t | j| jdd}| s| jdu s| j| jkrtd z|| j W n t	y8   td| j d Y nw | j
durgtj
| j
rgtj
| j
rgt| j
d}| }W d   n1 saw   Y  nd}| jrp| j}nd}| jry| j}n	| jr| j}nd}| jr| j}nd}|j| j||||d | jdu s| j| jkr|  n|j| jd	d
 |  dS )z0Update contents or metadata of an embedded file.Tr   Nr   no such embedded file 'r(   r   r   ro   r   )r6   rr   r1   r   r   r+   r,   r   r/   rN   rt   rs   r   r   r)   r   r0   r   r   embfile_updr   r   r   r   r   r   r   embedded_upd  sJ   



r   c                 C   sN  t | j| jdd}| }| jdurQ| j|vr"td| j d n/t  d}t	|dkr0d}td	t	|d
d| d t  t
|| j t  dS |s_td|j d dS t	|dkrsd|j dt	|d
d}nd|j d}t| t  |D ]}| jst| q||}t
|| t  q|  dS )zList embedded files.Tr   Nr   r(   rW   r   r    zprinting 1 of rE   z embedded filer   z' contains no embedded filesz' contains the following rq   z&' contains the following embedded file)r6   rr   r1   r   r/   r+   r,   r   r   r7   rD   r   detailr   )r   r   r   pluralrC   r/   _r   r   r   embedded_list  s<   





r   c              
   C   s  | j s| jstd t| j| jdd}| jr!t| j|j	d }nt
d|j	d }| js4tjtj}n| j}tj|rCtj|sLtd| d t }t }|D ]}| j r||d }|D ]N}|d }	|	|vr||	 ||	\}
}}}|dks|sqbtj||
d	d
 d
|	 d| }t|d}|| W d   n1 sw   Y  d}qb| jr.||d }|D ]o}|d }	|	|vr-||	 t||}t|tu r|d }|d }tj|d|	dd| }t|d}|| W d   n	1 sw   Y  qtj|d|	dd}|jjdk r!|nt !t j"|}|#| qqT| j rBt $dt%|dd| d | jrUt $dt%|dd| d |&  dS )z)Extract images and / or fonts from a PDF.z"neither fonts nor images requestedTr   r   zoutput directory z does not existr   zn/ar   r	   .r   Nextimagezimg-rE   z.pngr   zsaved z fonts to 'r(   z images to ')'fontsimagesr+   r,   r6   rr   r1   r   rj   rx   r`   r   rs   rt   r   curdirr   isdirr   get_page_fontsaddextract_fontr   rZ   r)   r   get_page_imagesr#   typedictr   r   r   r   r   r   r   r7   r   )r   r   r   out_dir
font_xrefsimage_xrefsr   itemlistr   rO   fontnamer   r   bufferoutnameoutfiler   imgdatar"   r   r   r   extract_objects  st   





r   c           	      C   sX   |rdnt dg}| jd|d}|s|s|| d S ||jddd || d S )N   
   textflagsutf8surrogatepasserrors)bytesget_textr   encode)	rU   textoutGRIDfontsize
noformfeed
skip_emptyr   eopr   r   r   r   page_simple+  s   

r  c           
      C   sz   |rdnt dg}| jd|d}|g kr|s|| d S |jdd d |D ]}	||	d jd	d
d q'|| d S )Nr   r   blocksr   c                 S   s   | d | d fS )Nro   r   r   )br   r   r   <lambda>>  s    z page_blocksort.<locals>.<lambda>keyr   r   r   r   )r   r   r   sortr   )
rU   r  r  r  r  r  r   r  r  r	  r   r   r   page_blocksort7  s   

r  c           !         s@  |rdnt dg}dtt dtdtfdd}dtt dtfd	d
}	dtt dtjf fdd}
dtdtfdddd }| jd|dd }|
|| \}}}}}|g kr]|s[|	| d S |	||}|j
dd d i }|D ]}|\}}}}|||}||g }|| |||< qnt| }|
  || }i }|D ]/}|| }t|}|dk rd||< qdd |D }|
  t|}||k r|}|d ||< q||d |d   |t|  d }|d }|	d |D ].}||k r|	d ||7 }||k s||||| || } |	| d  jd!d"d# || }q|	| d S )$Nr   r   valuesvaluereturnc                 S   s2   t | |}|r| |d  S td|dd|  )zFind the right row coordinate.

        Args:
            values: (list) y-coordinates of rows.
            value: (int) lookup for this value (y-origin of char).
        Returns:
            y-ccordinate of appropriate line for value.
        r   z	Line for gz not found in )bisectbisect_rightRuntimeError)r  r  rg   r   r   r   find_line_indexI  s   	z$page_layout.<locals>.find_line_indexrowsc                 S   sJ   t | } |   | d g}| dd  D ]}||d | kr"|| q|S )Nr   r   r$   )r_   r  r^   )r  r  nrowshr   r   r   curate_rowsX  s   

z page_layout.<locals>.curate_rowsr  rU   c              
      s  t  }|jj}|jj}|}|}d}g }| D ]}	|	d D ]}
|
d dkr$q|
d \}}}}|dk s6||jjkr7q|| }||krA|}|
d D ]}|d  krNqE|d D ]z}|d \}}}}|| }|d	 \}}tt|}|| |d
 }||kr|dkr|}||k r|}|dkr|g kr|d \}}}}||kr|tdkr|| }n|dkrtd}n|dkrtd}n|}||||f|d< qR|||||f qRqEqq|||||fS )Nr   linesdir)r   r   bboxspansrS   charsorigincr   r$      rg     rA     )	r   rectwidthheightr]   rv   r   chrr^   )r  rU   r  
page_widthpage_height	rowheightleftrightr  blocklinex0y0x1y1r'  spanr!  r   cwidthoxoychold_chold_oxold_oy
old_cwidthligr  joinligaturer   r   process_blocksa  s`   


)z#page_layout.<locals>.process_blocksr=  c                 S   st   | dkrt dS | dkrt dS | dkrt dS | dkr t dS | d	kr(t d
S | dkr0t dS | dkr8t dS | S )zReturn ligature character for a given pair / triple of characters.

        Args:
            lig: (str) 2/3 characters, e.g. "ff"
        Returns:
            Ligature, e.g. "ff" -> chr(0xFB00)
        ffr"  fii  fli  ffir#  fflr$  fti  sti  )r(  )r=  r   r   r   r?    s   	z!page_layout.<locals>.joinligaturec                 S   s   d}d}d}d}|t jkrtd|d|D ]`}|\}	}
}}|
|  }
|
| }||	kr4|
| |d kr4q|	dkrA||
 | dkrAq|	}|
|| k rR||	7 }|}|
}q|	dkrWqt|
| t| }|
|kro|dkro|d| 7 }||	7 }|}|
}q| S )	a  Produce the text of one output line.

        Args:
            left: (float) left most coordinate used on page
            slot: (float) avg width of one character in any font in use.
            minslot: (float) min width for the characters in this line.
            chars: (list[tuple]) characters of this line.
        Returns:
            text: (str) text string for this line
        rW   r   z#program error: minslot too small = r  g?r   g?r   )r   EPSILONr  r]   r7   rstrip)r,  slotminslotlcharsr   old_charold_x1r:  r!  charr6  r   r5  r2  deltar   r   r   make_textline  s:   
z"page_layout.<locals>.make_textlinerawdictr   c                 S   s   | d S )Nr   r   )r!  r   r   r   r
    s    zpage_layout.<locals>.<lambda>r  r&   r   c                 S   s   g | ]}|d  qS )ro   r   )r8   r!  r   r   r   r:     r;   zpage_layout.<locals>.<listcomp>r   r$   g333333?
r   r   r   )r   r   r]   r   r   r   PagerY   r   r   r  getr^   r_   r>   r7   
statisticsmedianr   )!rU   r  r  r  r  r  r   r  r  r  r@  rQ  r  r  r  r,  r-  r+  r  r!  r   r7  yrL  r>   rJ  minslotsr9   ccountwidths	this_slotrowposr   r   r>  r   page_layoutE  sb   	46






$


r^  c                 C   s   t | j| jdd}t| j|jd }| j}|d u r&tj	|j
\}}|d }t|dJ}tjtjB }| jr:|tjN }| jrB|tjN }| jrJ|tjN }tttd}|D ]}	||	d  }
|| j |
|| j| j| j| j|d qRW d    d S 1 sxw   Y  d S )NFr   r   z.txtr   simpler  layoutr   )r6   rr   r1   rj   r   rx   r   rs   rt   splitextr/   r)   r   TEXT_PRESERVE_LIGATURESTEXT_PRESERVE_WHITESPACEconvert_whitenoligaturesextra_spacesTEXT_INHIBIT_SPACESr  r  r^  modegridr  r  r  )r   r   r   r   r0   r   r  r   funcr   rU   r   r   r   gettext&  s>   


"rl  c                 C   s   t d t d d S )NzThis is from PyMuPDF message().zThis is from PyMuPDF log().)r   r   log)r   r   r   r   	_internalG  s   
rn  c                  C   sL  t jdtdd} | jddd}|jdtdd	}|jd
tdd |jddd |jdddd |jdddd |jdddd |jdtdd |jdtdd |jtd |jdtdd	}|jd
tdd |jdtd d |jddd |jd!d"d#d$d% |jd&td'd |jd(td)d |jd*t	d+t
d,d-d. |jd/dd0d1d2 |jd3dd0d4d2 |jd5dd0d6d2 |jd7t	d8d9d: |jd;dd0d<d2 |jd=dd0d>d2 |jdd?d |jtd |jd@tdAdBdC}|jd
dDdEdF |jdGdHdIdJ |jtd |jdKtdLd	}|jd
tdd |jdMddNd |jdOddPd |jdGdQd |jddd |jdtdRd |jtd |jdStdTd	}|jd
dd |jdUdVd |jdWddXd |jddd |jtd |jdYtdZd	}|jd
dd |jddd |jdGd[d |jdUdHd\dJ |jd]dHd^dJ |jd_d`d |jtd |jdatdbd	}|jd
dd |jddd |jdGd[d |jdUdHdcdJ |jtd |jddtdedfdC}|jd
dd |jdUdHdgdJ |jddd |jdGdhd |jd]did |jdjdkd |jdldmd |jd_dnd |jtd |jdotdpd	}	|	jd
tdd |	jdUdHdgdJ |	jddd |	jdqd0ddrds |	jdGdtd |	jtd |jdutdvd	}
|
jd
tdwd |
jddxd |
jdGdyd |
jdzdHd{dJ |
jd|d}d |
jdUdDd~dF |
jtd |jdtdd	}|jd
tdd |jddd |jdtdddd. |jdtddd |jdddd0d |jdddd0d |jdddd0d |jdddd0d |jdddd0d |jdGdd |jdtddd |jdtddd |jtd |jdtdd	}|jtd |  }t|ds|   dS || dS )zDefine command configurations.r   zBasic PyMuPDF Functions)progdescriptionSubcommandsz/Enter 'command -h' for subcommand specific help)titlehelpr2   zdisplay PDF information)rp  rr   zPDF filename)r   rs  z	-passwordr1   )rs  z-catalog
store_truezshow PDF catalog)actionrs  z-trailerzshow PDF trailerz	-metadatazshow PDF metadataz-xrefsz&show selected objects, format: 1,5-7,Nz-pagesz'show selected pages, format: 1,5-7,50-N)rk  r   z.optimize PDF, or create sub-PDF if pages givenr   zoutput PDF filenamez-encryptionzencryption methodr   r   )rs  choicesdefaultz-ownerzowner passwordz-userzuser passwordz-garbagezgarbage collection level   r   )r   rs  rv  rw  z	-compressFzcompress (deflate) output)ru  rw  rs  z-asciizASCII encode binary dataz-linearzformat for fast web displayz-permissionr$   zinteger with permission levels)r   rw  rs  z	-sanitizezsanitize / clean contentsz-prettyzprettify PDF structurez/output selected pages pages, format: 1,5-7,50-Nr   zjoin PDF documentsz3specify each input as 'filename[,password[,pages]]')rp  epilog*zinput filenames)nargsrs  z-outputTzoutput filename)requiredrs  extractz extract images and fonts to diskz-imageszextract imagesz-fontszextract fontsz-folder to receive output, defaults to currentz-consider these pages only, format: 1,5-7,50-Nz
embed-infozlist embedded filesz-namezif given, report only this onez-detailzdetail informationz	embed-addzadd embedded filez-output PDF filename, incremental save if nonezname of new entryz-pathzpath to data for new entryz-desczdescription of new entryz	embed-delzdelete embedded filezname of entry to deletez	embed-updzupdate embedded filez*except '-name' all parameters are optionalzname of entryz-Output PDF filename, incremental save if nonezpath to new data for entryz	-filenameznew filename to store in entryz
-ufilenamez&new unicode filename to store in entryz!new description to store in entryzembed-extractzextract embedded file to diskz-unsafezPallow write to stored name even if an existing file or outside current directory)rw  ru  rs  z'output filename, default is stored namez
embed-copyz copy embedded files between PDFszPDF to receive embedded fileszpassword of inputz2output PDF, incremental save to 'input' if omittedz-sourcezcopy embedded files from herez
-pwdsourcezpassword of 'source' PDFzrestrict copy to these entriesrl  z(extract text in various formatting modeszinput document filenamezpassword for input documentz-modez-mode: simple, block sort, or layout (default)r_  ra  z select pages, format: 1,5-7,50-Nz1-N)r   rs  rw  z-noligaturesz*expand ligature characters (default False))ru  rs  rw  z-convert-whitez6convert whitespace characters to white (default False)z-extra-spacesz%fill gaps with spaces (default False)z-noformfeedz-write linefeeds, no formfeeds (default False)z-skip-emptyz+suppress pages with no text (default False)z3store text in this file (default inputfilename.txt)z-gridz+merge lines if closer than this (default 2)r&   z	-fontsizez4only include text with a larger fontsize (default 3)ro   internalzinternal testingrk  N)argparseArgumentParserr   add_subparsers
add_parseradd_argumentrY   set_defaultsr2   r]   r`   r   r   r   r   r   r   r   r   r   floatrl  rn  
parse_argshasattr
print_helprk  )parsersubpsps_showps_cleanps_join
ps_extractps_embed_addps_embed_delps_embed_updps_embed_extractps_embed_copy
ps_gettextps_internalr   r   r   r   mainK  s  
r  __main__)FT)rU   )"r  r  rs   r+   rV  typingr   r   r   rW   r   r   r#   r6   rD   rT   rj   r2   r   r   r   r   r   r   r   r   r   r  r  r^  rl  rn  r  __name__r   r   r   r   <module>   sF   
$
/?/("2%B b!  7
