
    fb              	          d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
mZmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d dlmZmZ d dlmZ d d	l m!Z! d
dl"m#Z# d
dl$m%Z%m&Z&m'Z'm(Z( d
dl)m*Z* d
dl+m,Z,m-Z- d
dl.m/Z/m0Z0m1Z1m2Z2 d
dl#m3Z3m4Z4m5Z5 d
dl6m7Z7  e jp                  d      Z9 e:g d      Z;erd
dl<m=Z= d
dl>m?Z? ddddddZ@deAdeBfdZCdeed f   dee
eeeDeEf   d f      e
eB   f   fd!ZFdedee
eeeDeEf   d f      e
eB   f   fd"ZGd#eeBef   deeBef   fd$ZH G d% d&e      ZId7d'e%d(e&de%fd)ZJd'e%d*e&de%fd+ZK G d, d-e*      ZL G d. d/eL      ZMd0e%d1e%ddfd2ZN G d3 d4eM      ZO G d5 d6eM      ZPy)8    N)	lru_cache)
TYPE_CHECKINGAnyCallableDict	GeneratorListOptionalPatternTupleUnion)	normalize)PDFPageAggregator)LTCharLTComponentLTContainerLTCurveLTItemLTPageLTTextContainer)PDFPageInterpreter	PDFStackT)PDFPage)	PSLiteral   )utils)T_bboxT_numT_obj
T_obj_list)	Container)PDFStructTreeStructTreeMissing)T_table_settingsTableTableFinderTableSettings)decode_textresolve_allresolve_and_decode)TextMapz^LT)advheight	linewidthptssizesrcsizewidthx0x1y0y1bitsmatrixuprightfontnametext	imagemask
colorspaceevenoddfillnon_stroking_colorstrokestroking_colorstreammcidtag)	PageImage)PDFzSimSun,RegularzSimHei,RegularzSimKai,RegularzSimFang,RegularzSimLi,Regular)s   s   s   _GB2312s   _GB2312s   r:   returnc                     d| v r| j                  d      dz   }| d | | |d  }}nd| }}t        j                  |t        |      dd       }t        |      dd |z   S )N   +r          )indexCP936_FONTNAMESgetstr)r:   split_atprefixsuffix
suffix_news        6D:\switchATM\venv\Lib\site-packages\pdfplumber/page.pyfix_fontname_bytesrW   X   sp    x>>$'!+!)8,hxy.Ah $$VS[2->?Jv;qz))rK   color.c                 r    t        | d   t              r!| d d xs d t        | d   j                        fS | d fS )NrM   )
isinstancer   r(   name)rX   s    rV   separate_patternr\   c   s?     %)Y'cr
"d[r%@@@d{rK   c                     | yt        | t              r| }t        |      S t        | t              rt        |       }t        |      S | f}t        |      S )N)NN)rZ   tuplelistr\   )rX   	tuplefieds     rV   normalize_colorra   l   s^     }	E5	!	
 I&&	 
E4	 %L	 I&& H	I&&rK   kwargsc           	          | j                         D ci c]#  \  }}|t        |t              rt        |      n|% c}}S c c}}w N)itemsrZ   r_   r^   )rb   keyvalues      rV   tuplify_list_kwargsrh   z   sB     !,,.C 	j5eEl5@  s   (A c                        e Zd ZU dZdZee   ed<   dZee	   ed<   dde
dee   ddfdZdd	Zdd
Zdef fdZd fdZd fdZ xZS )"PDFPageAggregatorWithMarkedContentzZExtract layout from a specific page, adding marked-content IDs to
    objects where found.Ncur_mcidcur_tagrE   propsrH   c                     t        |j                        | _        t        |t              rd|v r|d   | _        yd| _        y)z5Handle beginning of tag, setting current MCID if any.MCIDN)r(   r[   rl   rZ   dictrk   )selfrE   rm   s      rV   	begin_tagz,PDFPageAggregatorWithMarkedContent.begin_tag   s4    "388,eT"v!&MDM DMrK   c                      d| _         d| _        y)z/Handle beginning of tag, clearing current MCID.N)rl   rk   rq   s    rV   end_tagz*PDFPageAggregatorWithMarkedContent.end_tag   s    rK   c                     | j                   j                  r<| j                   j                  d   }| j                  |_        | j                  |_        yy)z^Add current MCID to what we hope to be the most recent object created
        by pdfminer.six.rM   N)cur_item_objsrk   rD   rl   rE   )rq   cur_objs     rV   tag_cur_itemz/PDFPageAggregatorWithMarkedContent.tag_cur_item   s@     ==mm))"-G==GL,,GK rK   c                 F    t        |   |i |}| j                          |S )z;Hook for rendering characters, adding the `mcid` attribute.)superrender_charrz   )rq   argsrb   r,   	__class__s       rV   r}   z.PDFPageAggregatorWithMarkedContent.render_char   s(    g!4262
rK   c                 D    t        |   |i | | j                          y)z7Hook for rendering images, adding the `mcid` attribute.N)r|   render_imagerz   rq   r~   rb   r   s      rV   r   z/PDFPageAggregatorWithMarkedContent.render_image   s!    d-f-rK   c                 D    t        |   |i | | j                          y)zAHook for rendering lines and curves, adding the `mcid` attribute.N)r|   
paint_pathrz   r   s      rV   r   z-PDFPageAggregatorWithMarkedContent.paint_path   s!    D+F+rK   rd   rH   N)__name__
__module____qualname____doc__rk   r
   int__annotations__rl   rQ   r   r   rr   ru   rz   floatr}   r   r   __classcell__r   s   @rV   rj   rj      sj     #Hhsm"!GXc]!!Y !x	/B !d !
'e 
 rK   rj   box_rawrotationc                 z    t        | d   | d   f      \  }}t        | d   | d   f      \  }}|dv r||||fS ||||fS )Nr   rL   r      )Z   i  )sorted)r   r   r3   r4   r5   r6   s         rV   _normalize_boxr      s`    
 WQZ,-FBWQZ,-FB9BBBBrK   	mb_heightc                 (    | \  }}}}|||z
  |||z
  fS rd    )r   r   r3   r5   r4   r6   s         rV   _invert_boxr      s&    NBB	BIN33rK   c                      e Zd ZU ej                  dgz   Zee   ed<   dZe	ed<   dZ
	 dEddded	ed
efdZdFdZedefd       Zedefd       Zedeeeef      fd       Zedefd       Zedefd       Zedefd       Zedeeef   fd       Zdeeef   deeef   fdZdedefdZ dee!   de"eddf   fdZ#deeef   fdZ$	 dGde%e&   de'fdZ(	 dGde%e&   dee)   fdZ*	 dGde%e&   de%e)   fdZ+	 dGde%e&   deeee%e            fd Z,	 dGde%e&   de%eee%e            fd!Z-d"ede.fd#Z/	 	 	 	 	 dHd$e0ee1e   f   d%e	d&e	d'ed(e	d)e	d"edeeeef      fd*Z2d"edefd+Z3d"edefd,Z4d"edefd-Z5	 dId.e	d(e	d"edefd/Z6	 dJd0e7d1e	d2e	dd3fd4Z8	 dJd0e7d1e	d2e	dd3fd5Z9	 dJd0e7d1e	d2e	dd3fd6Z:d7e;ege	f   dd8fd9Z<d"edd8fd:Z=	 	 	 	 	 dKd;e%e0ee>f      d<e%e0ee>f      d=e%e0ee>f      d>e	d?e	dd@fdAZ?dGdBe%ee      deeef   fdCZ@defdDZAy)LPage_layoutcached_propertiesTis_originalNpdfrG   page_objpage_numberinitial_doctopc                 
   || _         | | _        | _        || _        || _        ddt
        dt        dt        ffd} |dd      }|dz  | _        t         |d      | j                        }|d	   |d
   z
  }t        ||      | _
        dj                  v r,t        t         |d      | j                        |      | _        n| j                  | _        | j                  | _         t               | j                        | _        y )Nrf   defaultrH   c                 X    t        j                  j                  |             }||S |S rd   )r)   attrsrP   )rf   r   rg   r   s      rV   get_attrzPage.__init__.<locals>.get_attr   s+     2 23 78E#m766rK   Rotater   ih  MediaBoxr   r   CropBoxrd   )r   	root_pager   r   r   rQ   r   r   r   r   mediaboxr   cropboxbboxr   _get_textmapget_textmap)	rq   r   r   r   r   r   	_rotationmb_rawr   s	     `      rV   __init__zPage.__init__   s      &,	7# 	7 	7s 	7 Xq)	!C 4dmmD1Iq	)	#FI6&&x	2DMMBIDL  ==DL MM	 '9;t'8'89rK   rH   c                 X    | j                          | j                  j                          y rd   )flush_cacher   cache_clearrt   s    rV   closez
Page.close   s     $$&rK   c                 @    | j                   d   | j                   d   z
  S )NrL   r   r   rt   s    rV   r2   z
Page.width       yy|diil**rK   c                 @    | j                   d   | j                   d   z
  S )Nr   r   r   rt   s    rV   r-   zPage.height   r   rK   c                     	 t        | j                  |       D cg c]  }|j                          c}S c c}w # t        $ r g cY S w xY w)z-Return the structure tree for a page, if any.)r"   r   to_dictr#   )rq   elems     rV   structure_treezPage.structure_tree  s@    	/<TXXt/LMtDLLNMMM  	I	s   9 49 9 AAc                 j   t        | d      r| j                  S t        | j                  j                  | j
                  | j                  j                        }t        | j                  j                  |      }|j                  | j                         |j                         | _        | j                  S )Nr   )pagenolaparams)hasattrr   rj   r   rsrcmgrr   r   r   process_pager   
get_result)rq   deviceinterpreters      rV   layoutzPage.layout  s    4#<<3HH##XX&&

 ))9)96B  /%002||rK   c                 F    dt         t        t        f   dt        dt         t        t        f   f fddt        dt        f fd}t	         j
                  j                        xs g }t        t        ||            }t         t              r j                  |      S |S )NptrrH   c                     |dz  }t        |      D ].  }| \  }}||dz  k(  rj                  nj                  }|||z
  f} 0 | S )Nr   rL   )ranger2   r-   )r   r   turnsixycomprq   s          rV   rotate_pointz!Page.annots.<locals>.rotate_point  sU    GE5\ %1%&%!)^tzz$(_% IrK   annotc                    | d   \  }}}} ||fj                         } ||fj                         }j                  j                  }t        t	        g ||      |      \  }}	}
}| j                  di       }|j                  d      | j                  d      | j                  d      d}|j                         D ]  \  }}|		 |j                  d      ||<    j                  d	|||z
  |
||	z
  j                  |	z   |	||
|z
  ||	z
  d
}|j                  |       d| v r| d<   | |d<   |S # t        $ r |j                  d      ||<   Y w xY w)NRectAURITContents)urititlecontentszutf-8zutf-16r   )r   object_typer3   r5   r4   r6   doctoptopbottomr2   r-   Pdata)r   r   r-   r   r   rP   re   decodeUnicodeDecodeErrorr   r   update)r   _a_b_c_dpt0pt1rhr3   r   r4   r   aextraskvparsedr   rq   s                    rV   parsezPage.annots.<locals>.parse#  sy   "6]NBBBx7CBx7C&&B"-n\s\S\.JB"OBR		#r"AuuU|3!IIj1F
  71=7$%HHW$5q	7  $//&6k3h--3 b 3,F MM&! e|!c
"F6NM- . 7$%HHX$6q	7s   
D22EE)r   r   r   r   r)   r   annotsr_   maprZ   CroppedPage_crop_fn)rq   r   rawr   r   s   `   @rV   r   zPage.annots  s    	U5%<0 	S 	U5%<=P 	'	 '	5 '	R $--../52c%o&dK(==((MrK   c                 L    | j                   D cg c]
  }|d   	| c}S c c}w )Nr   )r   )rq   r   s     rV   
hyperlinkszPage.hyperlinksS  s#    ;;?a!E(*>???s   
!!c                 t    t        | d      r| j                  S | j                         | _        | j                  S N_objects)r   r   parse_objectsrt   s    rV   objectszPage.objectsW  s0    4$== /3/A/A/C}}rK   r   c                 p    | j                   d   |d   z   | j                   d   | j                  z   |d   z
  fS )Nr   r   )r   r-   )rq   r   s     rV   point2coordzPage.point2coord^  s:    a 2a5($--*:T[[*H2a5*PQQrK   objc           
         t        j                  t        d|j                  j                        j                         }dt        t        t        f   dt        t        t        t        f      fd}t        t        d t        ||j                  j                                           }||d<   | j                  |d<   dD ]1  }t!        ||      st#        t%        ||      j&                        ||<   3 dD ]!  \  }}||v st)        ||         \  ||<   ||<   # t+        |t,        t.        f      rK|j1                         }| j2                  j4                   t7        | j2                  j4                  |      n||d	<   t+        |t,              rm|j8                  }	t)        |	j:                        \  |d
<   |d<   t)        |	j<                        \  |d<   |d<   t+        |d   t>              rtA        |d         |d<   n~t+        |tB        f      rmtE        t        | jF                  |d               |d<   |jH                  D 
cg c]  ^}
}|
gt        | jF                  |        c}}
|d<   |jJ                  |d<   | jL                  d d \  }}d|v rE| jN                  |d   z
  |z   |d<   | jN                  |d   z
  |z   |d<   | jP                  |d   z   |d<   d|v r|dk7  r|d   |z   |d<   |d   |z   |d<   |S c c}}
w )N itemrH   c                 <    | \  }}|t         v rt        |      }||fS y rd   )	ALL_ATTRSr)   )r  r   r   ress       rV   process_attrz)Page.process_object.<locals>.process_attre  s'    DAqI~!!n3xrK   r   r   )ncsscs))rB   stroking_pattern)r@   non_stroking_patternr;   rB   r  r@   r  r:   r/   pathdashrL   r5   r6   r   r   r   r3   r   r4   ))resublt_patr   r   lowerr   rQ   r   r
   rp   filterr   __dict__re   r   r   r*   getattrr[   ra   rZ   r   r   get_textr   unicode_normnormalize_unicodegraphicstatescolorncolorbytesrW   r   r_   r  original_pathdashing_styler   r-   r   )rq   r  kindr
  attrcs
color_attrpattern_attrr;   gscmdr/   mb_x0mb_tops                 rV   process_objectzPage.process_objectb  s   vvfb#--"8"89??A	uS#X 	8E#s(O3L 	 F4\3<<3E3E3G!HIJ"]"..]  	EB sB-gc2.>.C.CDR	E)
 	Y$J T!7FtJGW7X4Z $|"4	Y cFO45<<>D 88((4 "$(("7"7> L c6" !!B?N		@<D!"D);$< HW		HDD%&-C(D
 $z*E2#5d:6F#GZ gZ(s4#3#3T%[ABDK QTPaPab93S>3t'7'7#=>bDL,,DL
 bq)v4<;;d3v=DK"kkDJ6&@DN!004;>DN4<EQJde+DJde+DJ% cs   9#K.layout_objectsc              #      K   |D ]r  }t        |t              rM| j                  j                  | j	                  |       | j                  |j                        E d {    `| j	                  |       t y 7 wrd   )rZ   r   r   r   r*  iter_layout_objectsrx   )rq   r+  r  s      rV   r-  zPage.iter_layout_objects  sp      " 		/C#{+88$$0--c2233CII>>>))#..		/ ?s   AA=A; A=c                     i }| j                  | j                  j                        D ]6  }|d   }|dv r|j                  |      g ||<   ||   j	                  |       8 |S )Nr   )anno)r-  r   rx   rP   append)rq   r   r  r!  s       rV   r   zPage.parse_objects  sq    )+++DKK,=,=> 	&C}%Dx{{4 ( "DM  %	& rK   table_settingsc                 D    t        j                  |      }t        | |      S rd   )r'   resolver&   rq   r1  tsets      rV   debug_tablefinderzPage.debug_tablefinder  s!     $$^44&&rK   c                 X    t        j                  |      }t        | |      j                  S rd   )r'   r3  r&   tablesr4  s      rV   find_tableszPage.find_tables  s'     $$^44&---rK   c                     t        j                  |      }| j                  |      }t        |      dk(  ry dt        dt
        t        t        t        f   fd}t        t        ||            d   }|S )Nr   r   rH   c                 h    t        | j                         | j                  d   | j                  d   fS )Nr   r   )lencellsr   r   s    rV   sorterzPage.find_table.<locals>.sorter  s)    \M166!9affQi88rK   )rf   )
r'   r3  r9  r<  r%   r   r   r   r_   r   )rq   r1  r5  r8  r?  largests         rV   
find_tablezPage.find_table  so     $$^4!!$'v;!	9e 	9c5%&7 8 	9 vf&1215rK   c           	          t        j                  |      }| j                  |      }|D cg c]"  } |j                  di |j                  xs i $ c}S c c}w Nr   )r'   r3  r9  extracttext_settings)rq   r1  r5  r8  tables        rV   extract_tableszPage.extract_tables  sQ     $$^4!!$'IOP;!3!3!9r;PPPs   'Ac                     t        j                  |      }| j                  |      }|y  |j                  di |j                  xs i S rC  )r'   r3  rA  rD  rE  )rq   r1  r5  rF  s       rV   extract_tablezPage.extract_table  sI     $$^4%= 5==>D$6$6$<">>rK   rb   c                     t        | j                        }d|vr|j                  d| j                  i       d|vr|j                  d| j                  i       i ||}t        j                  | j                  fi |S )N)layout_bboxlayout_width_charslayout_widthlayout_height_charslayout_height)rp   r   r   r2   r-   r   chars_to_textmapchars)rq   rb   defaultsfull_kwargss       rV   r   zPage._get_textmap  sx    #'		$
  v-OO^TZZ89 .OO_dkk:;&<&<V&<%%djj@K@@rK   patternregexcase
main_groupreturn_charsreturn_groupsc                 f     | j                   di t        |      }|j                  ||||||      S )N)rU  rV  rW  rX  rY  r   )r   rh   search)	rq   rT  rU  rV  rW  rX  rY  rb   textmaps	            rV   r[  zPage.search  sG     #$""A%8%@A~~!%'  
 	
rK   c                 L     | j                   di t        |      j                  S rC  )r   rh   	as_stringrq   rb   s     rV   extract_textzPage.extract_text  s$    t>"5f"=>HHHrK   c                 B    t        j                  | j                  fi |S rd   )r   extract_text_simplerQ  r_  s     rV   rb  zPage.extract_text_simple  s    ((>v>>rK   c                 B    t        j                  | j                  fi |S rd   )r   extract_wordsrQ  r_  s     rV   rd  zPage.extract_words  s    ""4::888rK   stripc                 Z     | j                   di t        |      j                  ||      S )N)re  rX  r   )r   rh   extract_text_lines)rq   re  rX  rb   s       rV   rg  zPage.extract_text_lines"  s8      t>"5f"=>QQl R 
 	
rK   r   relativestrictr   c                      t        | |||      S )N)rh  ri  )r   rq   r   rh  ri  s       rV   cropz	Page.crop)  s     4HHrK   c                 >    t        | |||t        j                        S zS
        Same as .crop, except only includes objects fully within the bbox
        )rh  ri  crop_fn)r   r   within_bboxrk  s       rV   rp  zPage.within_bbox.  s"     $&%BSBS
 	
rK   c                 >    t        | |||t        j                        S rn  )r   r   outside_bboxrk  s       rV   rr  zPage.outside_bbox8  s"     $&%BTBT
 	
rK   test_functionFilteredPagec                     t        | |      S rd   )rt  )rq   rs  s     rV   r  zPage.filterB  s    D-00rK   c                     t        | d       }| j                  j                         D ci c]  \  }}||
 c}}|_        t	        j
                  | j                  fi ||j                  d<   |S c c}}w )u   
        Removes duplicate chars — those sharing the same text and positioning
        (within `tolerance`) as other characters in the set. Adjust extra_args
        to be more/less restrictive with the properties checked.
        c                      y)NTr   r>  s    rV   <lambda>z#Page.dedupe_chars.<locals>.<lambda>K  s    rK   char)rt  r   re   r   r   dedupe_charsrQ  )rq   rb   pr!  objss        rV   rz  zPage.dedupe_charsE  sd     ~.37<<3E3E3GHZT4dDjH
"//

EfE

6 Is   A0
resolutionr2   r-   	antialiasforce_mediaboxrF   c                     ddl m}m} t        d |||fD              }|dkD  rt	        d|       |d|z  | j
                  z  }n|d|z  | j                  z  } || |xs |||      S )z
        You can pass a maximum of 1 of the following:
        - resolution: The desired number pixels per inch. Defaults to 72.
        - width: The desired image width in pixels.
        - height: The desired image width in pixels.
        r   )DEFAULT_RESOLUTIONrF   c              3   $   K   | ]  }|d u 
 y wrd   r   ).0r   s     rV   	<genexpr>z Page.to_image.<locals>.<genexpr>`  s     K!Ks   zUOnly one of these arguments can be provided: resolution, width, height. You provided H   )r}  r~  r  )displayr  rF   sum
ValueErrorr2   r-   )	rq   r}  r2   r-   r~  r  r  rF   	num_specss	            rV   to_imagezPage.to_imageP  s     	;K
E6/JKK	q=ghqgrs  edjj0Jft{{2J!7%7)	
 	
rK   object_typesc           	      L   |(t        | j                  j                               dgz   }n|}| j                  | j                  | j
                  | j                  | j                  | j                  | j                  | j                  d}|D ]  }t        | |dz         ||dz   <    |S )Nr   )r   r   r   r   r   r   r2   r-   s)r_   r   keysr   r   r   r   r   r   r2   r-   r  )rq   r  _object_typesdts        rV   r   zPage.to_dictq  s     !2!2!45	AM(M++"11||IIZZkk	
  	0A q3w/Aa#gJ	0rK   c                 "    d| j                    dS )Nz<Page:>)r   rt   s    rV   __repr__zPage.__repr__  s    (()++rK   r   r   rd   )TTr   TT)TT)FT)NNNFF)Br   r   r   r!   r   r	   rQ   r   r   boolpagesr   r   r   r   r   propertyr2   r-   r   r   r   r   r   r    r   r   r   r   r  r   r   r*  r   r   r-  r   r
   r$   r&   r6  r%   r9  rA  rG  rI  r+   r   r   r   r[  r`  rb  rd  rg  r   rl  rp  rr  r   r  rz  r   r  r   r  r   rK   rV   r   r      s   #,#>#>)#LtCyLKE !"':': ': 	':
 ':R' +u + + + + + T#s(^ 4      7
 7 7r @J @ @ c:o.  ReE5L1 ReE5L6I RO& OU Ob/";//	5$$	%/	tCO4 	 <@'&'78'	' <@.&'78.	e. <@&'78	%$ <@Q&'78Q	d4&'	(Q <@?&'78?	$tHSM*+	,?	AS 	AW 	A !"
sGCL()
 
 	

 
 
 
 
 
d38n	
(IS IS I?C ?C ?9c 9j 9 8<

04
GJ
	
 DHII&*I<@I	I DH

&*
<@
	
 DH

&*
<@
	
1HeWd]$; 1 1	S 	^ 	 37-1.2$
U3:./
 c5j)*
 sEz*+	

 
 
 

BHT#Y$7 4S> &,# ,rK   r   c                   (    e Zd ZU dZeed<   defdZy)DerivedPageFr   parent_pagec                    || _         |j                  | _        |j                  | _        |j                  | _        |j                  | _        |j
                  | _        |j                  | _        |j                  | _        |j                  | _        | j                  t        j                          t               | j                        | _        y rd   )r  r   r   r   r   r   r   r   r   r   r!   r   r   r   r   )rq   r  s     rV   r   zDerivedPage.__init__  s    &$..??#,,&22)88#,,#,,"**445&9;t'8'89rK   N)r   r   r   r   r  r   r   r   r   rK   rV   r  r    s    K:D :rK   r  r   parent_bboxc                     t        j                  |       }|dk(  rt        d|  d      t        j                  | |      }|t        d|  d|       t        j                  |      }||k  rt        d|  d|       y )Nr   zBounding box z has an area of zero.z. is entirely outside parent page bounding box z. is not fully within parent page bounding box )r   calculate_arear  get_bbox_overlap)r   r  	bbox_areaoverlapoverlap_areas        rV   test_proposed_bboxr    s    $$T*IA~=.CDEE$$T;7GD6 "((3}6
 	

 ''0LiD6 "((3}6
 	
  rK   c                   ~     e Zd Zej                  ddfdededeeegef   de	de	f
 fdZ
ed	eeef   fd
       Z xZS )r   FTr  	crop_bboxro  rh  ri  c                 :   |r*|j                   \  }}}}\  }	}
}}|	|z   |
|z   ||z   ||z   f|rt        |j                          dt        dt        ffd}t        |   |       || _        t        j                  u r|j                   | _         y | _         y )Nr|  rH   c                      |       S rd   r   )r|  r  ro  s    rV   r   z&CroppedPage.__init__.<locals>._crop_fn  s    4++rK   )r   r  r    r|   r   r   r   rr  )rq   r  r  ro  rh  ri  o_x0o_top_r3   r   r4   r   r   r   s     ``          rV   r   zCroppedPage.__init__  s      + 0 0D%A"+BRdC%KdFUNKIy+*:*:;	,: 	,* 	, 	%  e(((#((DI!DIrK   rH   c                     t        | d      r| j                  S | j                  j                  j	                         D ci c]  \  }}|| j                  |       c}}| _        | j                  S c c}}w r   )r   r   r  r   re   r   rq   r   r   s      rV   r   zCroppedPage.objects  se    4$== ,0,<,<,D,D,J,J,L0
$(AqAt}}Q0
 }}0
s    A1)r   r   r   r   crop_to_bboxr   r   r   r    r  r   r  r   rQ   r   r   r   s   @rV   r   r     s}    
 ?D>P>P"" " :v.
:;	"
 " ": c:o.  rK   r   c                   T     e Zd Zdedeegef   f fdZede	e
ef   fd       Z xZS )rt  r  	filter_fnc                 T    |j                   | _         || _        t        |   |       y rd   )r   r  r|   r   )rq   r  r  r   s      rV   r   zFilteredPage.__init__  s$    $$	"%rK   rH   c                 
   t        | d      r| j                  S | j                  j                  j	                         D ci c]%  \  }}|t        t        | j                  |            ' c}}| _        | j                  S c c}}w r   )r   r   r  r   re   r_   r  r  r  s      rV   r   zFilteredPage.objects  sq    4$==  ((006680
1 tF4>>1-..0
 }}	0
s    *A?)r   r   r   r   r   r   r  r   r  r   rQ   r    r   r   r   s   @rV   rt  rt    sE    &D &Xugtm5L &
 c:o.  rK   rt  r  )Qr  	functoolsr   typingr   r   r   r   r   r	   r
   r   r   r   unicodedatar   r  pdfminer.converterr   pdfminer.layoutr   r   r   r   r   r   r   pdfminer.pdfinterpr   r   pdfminer.pdfpager   pdfminer.psparserr   r  r   _typingr   r   r   r    	containerr!   	structurer"   r#   rF  r$   r%   r&   r'   r(   r)   r*   
utils.textr+   compiler  setr  r  rF   r   rG   rO   r  rQ   rW   r   r   r\   ra   rh   rj   r   r   r   r  r  r   rt  r   rK   rV   <module>r     s   	    7 0   = $ '  5 5   7 F F ? ? 	F		@ "
 *) 0 1(* *3 *c?
8E%s
+S012HSMAB''
8E%s
+S012HSMAB'S#X 4S> 0): 0f
 F 
 e 
 F 
  4 4E 4f 4
|,9 |,~:$ :"
V 
& 
T 
(%+ %P; rK   