
    f7                        d Z ddlZddlZddlZddlZddlmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZ ddlZddlmZmZmZ ddlmZ ddlmZ ddlmZmZ dd	lmZmZm Z m!Z! dd
l"m#Z#m$Z$m%Z% ddl&m'Z'  ejP                           ejR                  e*      Z+ ejX                  d      Z-dee.e/f   de.fdZ0d(dede1dee.   ddfdZ2	 d)dedede3ddfdZ4	 	 d*dededee.   de3ddf
dZ5	 	 	 	 d+dede.dedee6   de.de3dee.   dee.   ddfdZ7 e%d       Z8 e%d!      Z9de.de.de.ddfd"Z:	 	 	 	 	 d,dede.de
e6   dee6   de.de3dee.   dee.   de3ddfd#Z;defd$Z<d(d%eee.      ddfd&Z=e*d'k(  r e=        yy)-z#Extract pdf structure in XML format    N)	Any	ContainerDictIterableListOptionalTextIOUnioncast)ArgumentParser)PDFDocumentPDFNoOutlinesPDFXRefFallback)PDFPage)	PDFParser)PDFObjectNotFoundPDFValueError)	PDFStream	PDFObjRefresolve1stream_value)	PSKeyword	PSLiteralLIT)isnumberz&[\000-\037&<>()"\042\047\134\177-\377]sreturnc                 n    t        | t              rt        | d      }n| }t        j	                  d |      S )Nzlatin-1c                 <    dt        | j                  d            z  S )Nz&#%d;r   )ordgroup)ms    <D:\switchATM\venv\Lib\site-packages\../../Scripts/dumppdf.py<lambda>zescape.<locals>.<lambda>   s    3qwwqz?!:     )
isinstancebytesstrESC_PATsub)r   uss     r#   escaper,      s/    !UI;;:B??r%   outobjcodecc                    || j                  d       y t        |t              r| j                  dt        |      z         |j	                         D ]G  \  }}| j                  d|z         | j                  d       t        | |       | j                  d       I | j                  d       y t        |t              rS| j                  dt        |      z         |D ]  }t        | |       | j                  d       ! | j                  d	       y t        |t        t        f      r)| j                  d
t        |      t        |      fz         y t        |t              r|dk(  r | j                  |j                                y |dk(  r | j                  |j                                y | j                  d       t        | |j                         | j                  d       |dk(  r8|j                         }| j                  dt        |      t        |      fz         | j                  d       y t        |t              r| j                  d|j                  z         y t        |t               r| j                  d|j"                  z         y t        |t$              r| j                  d|j"                  z         y t'        |      r| j                  d|z         y t)        |      )Nz<null />z<dict size="%d">
z<key>%s</key>
z<value>z	</value>
z</dict>z<list size="%d">

z</list>z<string size="%d">%s</string>rawbinaryz<stream>
<props>
z

</props>
textz<data size="%d">%s</data>
z	</stream>z<ref id="%d" />z<keyword>%s</keyword>z<literal>%s</literal>z<number>%s</number>)writer&   dictlenitemsdumpxmllistr(   r'   r,   r   get_rawdataget_dataattrsr   objidr   namer   r   	TypeError)r-   r.   r/   kvdatas         r#   r9   r9   !   sY   
{		*#t		&S12iik 	$FQII'!+,IIi COIIl#		$
 			)#t		&S12 	ACOIIdO	 			)#U|$		1SXvc{4KKL#y!E>IIcoo'( 	 hIIclln% 	 II+,C#IIn%||~		73t9fTl:SSTIIk"#y!		#cii/0#y!		)CHH45#y!		)CHH45}		'#-.
C.r%   docshow_fallback_xrefc                 2   |j                   D ]Q  }t        |t              r|s| j                  d       t	        | |j                                | j                  d       S t        d |j                   D              }|r|sd}t        j                  |       y )Nz
<trailer>
z
</trailer>

c              3   <   K   | ]  }t        |t                y wN)r&   r   ).0xrefs     r#   	<genexpr>zdumptrailers.<locals>.<genexpr>j   s     K:dO4Ks   zThis PDF does not have an xref. Use --show-fallback-xref if you want to display the content of a fallback xref that contains all objects.)	xrefsr&   r   r5   r9   get_trailerallloggerwarning)r-   rD   rE   rJ   no_xrefsmsgs         r#   dumptrailersrS   b   s     		 *$04FIIm$C))+,II()	*
 KKKH*$ 	
 	s
r%   c                    t               }| j                  d       |j                  D ]u  }|j                         D ]`  }||v r|j	                  |       	 |j                  |      }|.| j                  d|z         t        | ||       | j                  d       b w t        | ||       | j                  d       y # t        $ r}t        d|z         Y d }~d }~ww xY w)Nz<pdf>z<object id="%d">
r/   z
</object>

znot found: %rz</pdf>)
setr5   rL   
get_objidsaddgetobjr9   r   printrS   )	r-   rD   r/   rE   visitedrJ   r>   r.   es	            r#   dumpallobjsr]   u   s     eGIIg		 +__& 	+EKK+jj';		.67S.		+,	++ c-.IIh
	 % +o)**+s   B?)3B??	C CC outfpfnameobjidspagenospassworddumpall
extractdirc                    t        |d      }t        |      }	t        |	|      t        t	        j
                        d      D 
ci c]  \  }
}|j                  |
 }}
}dt        dt        ffd}	 j                         }| j                  d       |D ]  \  }}}}}d }
|r ||      }||d   j                     }
nc|ra|}t        |t              rO|j                  d      }|r<t        |      d	k(  r.|j                  d
      r ||d
         }||d   j                     }
t!        |      }| j                  dj#                  ||             |.| j                  d       t%        | |       | j                  d       |
| j                  d|
z         | j                  d        | j                  d       |	j)                          |j)                          y c c}}
w # t&        $ r Y 2w xY w)Nrb   destr   c                 <   t        | t        t        f      rt        j	                  |             } n4t        | t
              r$t        j	                  | j                              } t        | t              r| d   } t        | t              r| j                         } | S )ND)
r&   r(   r'   r   get_destr   r?   r6   r   resolve)rh   rD   s    r#   resolve_destz!dumpoutline.<locals>.resolve_dest   sr    dS%L)CLL./Di(CLL34DdD!9DdI&<<>Dr%   z<outlines>
r   Sz/'GoTo'rj   z"<outline level="{!r}" title="{}">
z<dest>z</dest>
z<pageno>%r</pageno>
z</outline>
z</outlines>
)openr   r   	enumerater   create_pagespageidobjectr   get_outlinesr5   r>   r&   r6   getreprr,   formatr9   r   close)r^   r_   r`   ra   rb   rc   r/   rd   fpparserpagenopagepagesrm   outlinesleveltitlerh   aseactionsubtyper   rD   s                          @r#   dumpoutliner      s    
eT	Br]F
fh
'C ((<(<S(A1EVT 	VE 
	6 	c 	##%N#+3 	('UE4BF#D)tAw}}-fd+$jjoG4=I#=&**S/+F3K8!&tAw}}!5uAKK=DDUANOH%t$K(!3f<=KK')	(* 	O$ LLNHHJ
[R  s   G4EG! !	G-,G-FilespecEmbeddedFilec                   
 dt         dt        t        t        f   dd f
fd}t	        | d      5 }t        |      }t        ||      
t               }
j                  D ]p  }|j                         D ][  }
j                  |      }	||vst        |	t              s*|	j                  d      t        u sB|j                  |        |||	       ] r 	 d d d        y # 1 sw Y   y xY w)Nr>   r.   r   c                 r   t         j                  j                  |j                  d      xs- t	        t
        |j                  d            j                               }|d   j                  d      xs |d   j                  d      }j                  |j                        }t        |t              sd|z  }t        |      |j                  d      t        urt        d|z        t         j                  j                  	d| |fz        }t         j                  j                  |      rt        d|z        t!        d	|z         t        j"                  t         j                  j%                  |      d
       t'        |d      }|j)                  |j+                                |j-                          y )NUFFEFz:unable to process PDF: reference for %r is not a PDFStreamTypez>unable to process PDF: reference for %r is not an EmbeddedFilez%.6d-%szfile exists: %rzextracting: %rT)exist_okwb)ospathbasenameru   r   r'   decoderY   r>   r&   r   r   LITERAL_EMBEDDEDFILEjoinexistsIOErrorrZ   makedirsdirnamero   r5   r<   rx   )
r>   r.   filenamefilereffileobj	error_msgr   r-   rD   rd   s
           r#   extract1z!extractembedded.<locals>.extract1   s^   77##CGGDM$WT%5N5U5U5WXd)--%;Ts);**W]]+'9-&'   	**;;v&::),46  ww||J	UH4E(EF77>>$+d233%&
BGGOOD)D94		'""$%		r%   rf   r   )intr   r(   r   ro   r   r   rV   rL   rW   rY   r&   r6   ru   LITERAL_FILESPECrX   )r_   rb   rd   r   ry   rz   extracted_objidsrJ   r>   r.   rD   s     `       @r#   extractembeddedr      s     $sCx. T 2 
eT	 )b2&(+5II 		)D* )jj'!11"3-+;;$((/UC()		)	) ) s   ACCC4CC%c	                 
   t        |d      }	t        |	      }
t        |
|      }|r&|D ]!  }|j                  |      }t	        | ||       # |rnt        t        j                  |            D ]M  \  }}||v s|r+|j                  D ]  }t        |      }t	        | ||        8t	        | |j                         O |rt        | |||       |s|s|st        | ||       |	j                          |dvr| j                  d       y )Nrf   rU   )r2   r3   r1   )ro   r   r   rY   r9   rp   r   rq   contentsr   r=   r]   rS   rx   r5   )r^   r_   r`   ra   rb   rc   r/   rd   rE   ry   rz   rD   r>   r.   r{   r|   s                   r#   dumppdfr      s    
eT	Br]F
fh
'C 	-E**U#CE3e,	- '(<(<S(AB 	/NVT #}} 9*3/s%89 E4::.	/ E3'9:WwUC!34HHJ%%D
r%   c                     t        t        d      } | j                  dt        d dd       | j                  ddd	d
j	                  t
        j                               | j                  ddddd       | j                         }|j                  ddddd       |j                  ddt        d       | j                  dd      }|j                  dt        d dd       |j                  ddt        d        |j                  d!d"t        d#       |j                  d$d%ddd&       |j                  d'dd()       |j                  d*d+t        d,d-.       | j                  d/d0      }|j                  d1d2t        d3d4.       |j                         }|j                  d5d6ddd7       |j                  d8d9ddd:       |j                  d;d<ddd=       | S )>NT)descriptionadd_helpfiles+zOne or more paths to PDF files.)typedefaultnargshelpz	--versionz-vversionzpdfminer.six v{})r   r   z--debugz-dF
store_truezUse debug logging level.)r   r   r   z--extract-tocz-TzExtract structure of outlinez--extract-embeddedz-EzExtract embedded files)r   r   ParserzUsed during PDF parsing)r   z--page-numbersz0A space-seperated list of page numbers to parse.z	--pagenosz-pzA comma-separated list of page numbers to parse. Included for legacy applications, use --page-numbers for more idiomatic argument entry.z	--objectsz-iz1Comma separated list of object numbers to extractz--allz-az3If the structure of all objects should be extractedz--show-fallback-xrefzAdditionally show the fallback xref. Use this if the PDF has zero or only invalid xref's. This setting is ignored if --extract-toc or --extract-embedded is used.)r   r   z
--passwordz-P z,The password to use for decrypting PDF file.)r   r   r   OutputzUsed during output generation.z	--outfilez-o-zJPath to file where output is written. Or "-" (default) to write to stdout.z--raw-streamz-rz%Write stream objects without encodingz--binary-streamz-bz)Write stream objects with binary encodingz--text-streamz-tz"Write stream objects as plain text)
r   __doc__add_argumentr(   rw   pdfminer__version__add_mutually_exclusive_groupadd_argument_groupr   )rz   procedure_parserparse_paramsoutput_paramscodec_parsers        r#   create_parserr   !  s   $?F
.   "))(*>*>?	   '   ::<!!+ "  !!d3K "  ,,7 - L ?   	   @	   B   7   ;   --> . M    !==?L4   8   1   Mr%   argvc                 N   t               }|j                  |       }|j                  r1t        j                         j                  t        j                         |j                  dk(  rt        j                  }nt        |j                  d      }|j                  r2|j                  j                  d      D cg c]  }t        |       }}ng }|j                  r|j                  D ch c]  }|dz
  	 }}nK|j                  r5|j                  j                  d      D ch c]  }t        |      dz
   }}n
t!               }|j"                  }|j$                  rd}n |j&                  rd}n|j(                  rd}nd }|j*                  D ]y  }	|j,                  rt/        ||	||||j0                  |d 	       -|j2                  rt5        |	||j2                  
       Rt7        ||	||||j0                  |d |j8                  	       { |j;                          y c c}w c c}w c c}w )N)argsr   w,rg   r2   r3   r4   )rb   rc   r/   rd   )rb   rd   )rb   rc   r/   rd   rE   )r   
parse_argsdebuglogging	getLoggersetLevelDEBUGoutfilesysstdoutro   objectssplitr   page_numbersra   rV   rb   
raw_streambinary_streamtext_streamr   extract_tocr   rN   extract_embeddedr   r   rE   rx   )
r   rz   r   r^   xr`   ra   rb   r/   r_   s
             r#   mainr     s   _F$'Dzz$$W]]3||s

T\\3'||"&,,"4"4S"9:Q#a&::"&"3"34Q1q544	'+||'9'9#'>?!3q6A:??%}}H$						 !	 ""EHAVAVW!#'#:#:
6 
KKMc ;
 5?s   9H*H"H"__main__rH   )F)NF)r   FNN)r   FNNF)>r   r   os.pathr   rer   typingr   r   r   r   r   r   r	   r
   r   argparser   r   pdfminer.pdfdocumentr   r   r   pdfminer.pdfpager   pdfminer.pdfparserr   pdfminer.pdftypesr   r   r   r   r   r   pdfminer.psparserr   r   r   pdfminer.utilsr   basicConfigr   __name__rO   compiler)   r(   r'   r,   rs   r9   boolrS   r]   r   r   r   r   r   r   r   r    r%   r#   <module>r      s   )   	 
 V V V #  L L $ ( > J J 7 7 #    			8	$
"**>
?@eCJ @C @> >f >Xc] >d >D ?D	!7;	,  $			 C= 	
 
@  $::: : s^	:
 : : C=: : 
:z z? >* (3 (# (3 (4 (`  $$""" SM" s^	"
 " " C=" " " 
"Js~ sl>xS	" >d >B zF r%   