
    Kui?                       d Z ddlmZ ddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5m6Z6m7Z7  e	jp                  e9      Z:ee;e;f   Z<d3dZ= G d de>      Z?d4dZ@ G d de>      ZAd5dZBd6dZCd7dZD G d  d!      ZE G d" d#e      ZFd8d$ZGeGd9d&       ZH ed'(       G d) d%             ZI G d* d+e      ZJ	 d:	 	 	 	 	 	 	 d;d,ZK	 d<	 	 	 	 	 d=d-ZLd>d.ZM G d/ d0e      ZN G d1 d2      ZOy)?zO
The main purpose of this module is to expose LinkCollector.collect_sources().
    )annotationsN)IterableMutableMappingSequence)	dataclass)
HTMLParser)Values)Callable
NamedTupleProtocol)requests)Response)
RetryErrorSSLError)NetworkConnectionError)Link)SearchScope)
PipSession)raise_for_status)is_archive_fileredact_auth_from_url)url_to_path)vcs   )CandidatesFromPage
LinkSourcebuild_sourcec                    t         j                  D ]6  }| j                         j                  |      s#| t	        |         dv s4|c S  y)zgLook for VCS schemes in the URL.

    Returns the matched VCS scheme, or None if there's no match.
    z+:N)r   schemeslower
startswithlen)urlschemes     g/root/.openclaw/workspace/chatterbox_venv/lib/python3.12/site-packages/pip/_internal/index/collector.py_match_vcs_schemer'   .   sE    
 ++ 99;!!&)c#f+.>$.FM     c                        e Zd Zd fdZ xZS )_NotAPIContentc                B    t         |   ||       || _        || _        y N)super__init__content_typerequest_desc)selfr/   r0   	__class__s      r&   r.   z_NotAPIContent.__init__:   s"    |4((r(   )r/   strr0   r3   returnNone)__name__
__module____qualname__r.   __classcell__r2   s   @r&   r*   r*   9   s    ) )r(   r*   c                    | j                   j                  dd      }|j                         }|j                  d      ryt	        || j
                  j                        )z
    Check the Content-Type header to ensure the response contains a Simple
    API Response.

    Raises `_NotAPIContent` if the content type is not a valid content-type.
    Content-TypeUnknown)z	text/htmlz#application/vnd.pypi.simple.v1+html#application/vnd.pypi.simple.v1+jsonN)headersgetr!   r"   r*   requestmethod)responser/   content_type_ls      r&   _ensure_api_headerrE   @   s[     ##''	BL!'')N  	
 	
x'7'7'>'>
??r(   c                      e Zd Zy)_NotHTTPN)r6   r7   r8    r(   r&   rG   rG   V   s    r(   rG   c                    t         j                  j                  |       \  }}}}}|dvr
t               |j	                  | d      }t        |       t        |       y)z
    Send a HEAD request to the URL, and ensure the response contains a simple
    API Response.

    Raises `_NotHTTP` if the URL is not available for a HEAD request, or
    `_NotAPIContent` if the content type is not a valid content type.
    >   httphttpsT)allow_redirectsN)urllibparseurlsplitrG   headr   rE   )r$   sessionr%   netlocpathqueryfragmentresps           r&   _ensure_api_responserW   Z   sV     -3LL,A,A#,F)FFD%&&j<<T<2DTtr(   c                   t        t        |       j                        rt        | |       t        j                  dt        |              |j                  | dj                  g d      dd      }t        |       t        |       t        j                  dt        |       |j                  j                  d	d
             |S )aY  Access an Simple API response with GET, and return the response.

    This consists of three parts:

    1. If the URL looks suspiciously like an archive, send a HEAD first to
       check the Content-Type is HTML or Simple API, to avoid downloading a
       large file. Raise `_NotHTTP` if the content type cannot be determined, or
       `_NotAPIContent` if it is not HTML or a Simple API.
    2. Actually perform the request. Raise HTTP exceptions on network failures.
    3. Check the Content-Type header to make sure we got a Simple API response,
       and raise `_NotAPIContent` otherwise.
    rQ   zGetting page %sz, )r>   z*application/vnd.pypi.simple.v1+html; q=0.1ztext/html; q=0.01z	max-age=0)AcceptzCache-Control)r?   zFetched page %s as %sr<   r=   )r   r   filenamerW   loggerdebugr   r@   joinr   rE   r?   )r$   rQ   rV   s      r&   _get_simple_responser_   l   s     tCy))*S'2
LL"$8$=>;;ii( )+
  D4 T t
LLS!3 Kr(   c                    | rHd| v rDt         j                  j                         }| d   |d<   |j                  d      }|rt	        |      S y)z=Determine if we have any encoding information in our headers.r<   zcontent-typecharsetN)emailmessageMessage	get_paramr3   )r?   mra   s      r&   _get_encoding_from_headersrg      sK    >W,MM!!##N3.++i(w<r(   c                  $    e Zd ZddZddZddZy)CacheablePageContentc                .    |j                   sJ || _        y r,   )cache_link_parsingpager1   rl   s     r&   r.   zCacheablePageContent.__init__   s    &&&&	r(   c                    t        |t        |             xr- | j                  j                  |j                  j                  k(  S r,   )
isinstancetyperl   r$   )r1   others     r&   __eq__zCacheablePageContent.__eq__   s-    %d,P%**..1PPr(   c                @    t        | j                  j                        S r,   )hashrl   r$   r1   s    r&   __hash__zCacheablePageContent.__hash__   s    DIIMM""r(   N)rl   IndexContentr4   r5   )rq   objectr4   bool)r4   int)r6   r7   r8   r.   rr   rv   rH   r(   r&   ri   ri      s    Q#r(   ri   c                      e Zd ZddZy)
ParseLinksc                     y r,   rH   rm   s     r&   __call__zParseLinks.__call__   s    r(   Nrl   rw   r4   zIterable[Link])r6   r7   r8   r~   rH   r(   r&   r|   r|      s    Ar(   r|   c                z     t         j                  d fd       t        j                         d fd       }|S )z
    Given a function that parses an Iterable[Link] from an IndexContent, cache the
    function's result (keyed by CacheablePageContent), unless the IndexContent
    `page` has `page.cache_link_parsing == False`.
    c                :    t         | j                              S r,   )listrl   )cacheable_pagefns    r&   wrapperz*with_cached_index_content.<locals>.wrapper   s    B~**+,,r(   c                `    | j                   r t        |             S t         |             S r,   )rk   ri   r   )rl   r   r   s    r&   wrapper_wrapperz2with_cached_index_content.<locals>.wrapper_wrapper   s+    ""/566BtH~r(   )r   ri   r4   
list[Link])rl   rw   r4   r   )	functoolscachewraps)r   r   r   s   ` @r&   with_cached_index_contentr      s@     __- - __R 
 r(   rw   c              #  T  K   | j                   j                         }|j                  d      r^t        j                  | j
                        }|j                  dg       D ])  }t        j                  || j                        }|&| + yt        | j                        }| j                  xs d}|j                  | j
                  j                  |             | j                  }|j                  xs |}|j                  D ]!  }	t        j                   |	||      }|| # yw)z\
    Parse a Simple API's Index Content, and yield its anchor elements as Link objects.
    r>   filesNzutf-8)page_urlbase_url)r/   r!   r"   jsonloadscontentr@   r   	from_jsonr$   HTMLLinkParserencodingfeeddecoder   anchorsfrom_element)
rl   rD   datafilelinkparserr   r$   r   anchors
             r&   parse_linksr      s      &&,,.N  !FGzz$,,'HHWb) 	D>>$1D|J		
 	DHH%F}}'H
KK##H-.
((C%#H..   #I<
	s   D&D(T)frozenc                  P    e Zd ZU dZded<   ded<   ded<   ded<   d	Zd
ed<   ddZy)rw   a  Represents one response (or page), along with its URL.

    :param encoding: the encoding to decode the given content.
    :param url: the URL from which the HTML was downloaded.
    :param cache_link_parsing: whether links parsed from this page's url
                               should be cached. PyPI index urls should
                               have this set to False, for example.
    bytesr   r3   r/   
str | Noner   r$   Try   rk   c                ,    t        | j                        S r,   )r   r$   ru   s    r&   __str__zIndexContent.__str__  s    #DHH--r(   N)r4   r3   )r6   r7   r8   __doc____annotations__rk   r   rH   r(   r&   rw   rw      s/     N	H##.r(   c                  4     e Zd ZdZd fdZddZddZ xZS )r   zf
    HTMLParser that keeps the first base HREF and a list of all anchor
    elements' attributes.
    c                P    t         |   d       || _        d | _        g | _        y )NT)convert_charrefs)r-   r.   r$   r   r   )r1   r$   r2   s     r&   r.   zHTMLLinkParser.__init__  s(    $/$(46r(   c                    |dk(  r(| j                   | j                  |      }||| _         y y |dk(  r%| j                  j                  t	        |             y y )Nbasea)r   get_hrefr   appenddict)r1   tagattrshrefs       r&   handle_starttagzHTMLLinkParser.handle_starttag  sT    &=T]]2=='D $  CZLLU, r(   c                *    |D ]  \  }}|dk(  s|c S  y )Nr   rH   )r1   r   namevalues       r&   r   zHTMLLinkParser.get_href!  s&      	KD%v~	 r(   )r$   r3   r4   r5   )r   r3   r   list[tuple[str, str | None]]r4   r5   )r   r   r4   r   )r6   r7   r8   r   r.   r   r   r9   r:   s   @r&   r   r     s    
7-r(   r   c                <    |t         j                  } |d| |       y )Nz%Could not fetch URL %s: %s - skipping)r\   r]   )r   reasonmeths      r&   _handle_get_simple_failr   (  s    
 |||	0$?r(   c                    t        | j                        }t        | j                  | j                  d   || j                  |      S )Nr<   )r   r$   rk   )rg   r?   rw   r   r$   )rC   rk   r   s      r&   _make_index_contentr   2  sE     *(*:*:;H(LL- r(   c               $   | j                   j                  dd      d   }t        |      }|rt        j	                  d||        y |j                  d      rtt        j                  j                  t        |            rL|j                  d      s|dz  }t        j                  j                  |d      }t        j                  d|       	 t        ||	      }t!        || j"                  
      S # t$        $ r t        j	                  d|        Y y t&        $ r6}t        j	                  d| |j(                  |j*                         Y d }~y d }~wt,        $ r}t/        | |       Y d }~y d }~wt0        $ r}t/        | |       Y d }~y d }~wt2        $ r6}d}|t5        |      z  }t/        | |t        j6                         Y d }~y d }~wt8        j:                  $ r}t/        | d|        Y d }~y d }~wt8        j<                  $ r t/        | d       Y y w xY w)N#r   r   zICannot look at %s URL %s because it does not support lookup as web pages.zfile:/z
index.htmlz# file: URL is directory, getting %srY   )rk   z`Skipping page %s because it looks like an archive, and cannot be checked by a HTTP HEAD request.zSkipping page %s because the %s request got Content-Type: %s. The only supported Content-Types are application/vnd.pypi.simple.v1+json, application/vnd.pypi.simple.v1+html, and text/htmlz4There was a problem confirming the ssl certificate: )r   zconnection error: z	timed out)r$   splitr'   r\   warningr"   osrS   isdirr   endswithrM   rN   urljoinr]   r_   r   rk   rG   r*   r0   r/   r   r   r   r   r3   infor   ConnectionErrorTimeout)r   rQ   r$   
vcs_schemerV   excr   s          r&   _get_index_contentr   ?  s   
((..a
 
#C #3'JW	

  ~~g277==S1A#B ||C 3JC
 ll""35:C@U#C9: #4D<S<STT9  
1	
8 /  
A 	
 	
,  " +c**   +c**   @G#c(f6;;??  ## B(:3%&@AA
 	  3k2 	3sT   C/ /HH,E		HE&&H2FH,G  HG**"HHc                  "    e Zd ZU ded<   ded<   y)CollectedSourceszSequence[LinkSource | None]
find_links
index_urlsN)r6   r7   r8   r   rH   r(   r&   r   r   {  s    ++++r(   r   c                  v    e Zd ZdZ	 	 	 	 	 	 ddZe	 d		 	 	 	 	 	 	 d
d       Zedd       ZddZ		 	 	 	 	 	 ddZ
y)LinkCollectorz
    Responsible for collecting Link objects from all configured locations,
    making network requests as needed.

    The class's main method is its collect_sources() method.
    c                     || _         || _        y r,   )search_scoperQ   )r1   rQ   r   s      r&   r.   zLinkCollector.__init__  s    
 )r(   c                0   |j                   g|j                  z   }|j                  r0|s.t        j	                  ddj                  d |D                     g }|j                  xs g }t        j                  |||j                        }t        ||      }|S )z
        :param session: The Session to use to make requests.
        :param suppress_no_index: Whether to ignore the --no-index option
            when constructing the SearchScope object.
        zIgnoring indexes: %s,c              3  2   K   | ]  }t        |        y wr,   r   ).0r$   s     r&   	<genexpr>z'LinkCollector.create.<locals>.<genexpr>  s     Is-c2Is   )r   r   no_index)rQ   r   )
	index_urlextra_index_urlsr   r\   r]   r^   r   r   creater   )clsrQ   optionssuppress_no_indexr   r   r   link_collectors           r&   r   zLinkCollector.create  s     ''(7+C+CC
$5LL&IjII J ''-2
"))!!%%

 '%
 r(   c                .    | j                   j                  S r,   )r   r   ru   s    r&   r   zLinkCollector.find_links  s      +++r(   c                0    t        || j                        S )z>
        Fetch an HTML page containing package links.
        rY   )r   rQ   )r1   locations     r&   fetch_responsezLinkCollector.fetch_response  s     "(DLLAAr(   c                    t        j                   fd j                  j                        D              j	                         }t        j                   fd j
                  D              j	                         }t        j                  t        j                        rwt        j                  ||      D cg c]  }||j                  d|j                   ! }}t        |       d dg|z   }t        j                  dj                  |             t!        t#        |      t#        |            S c c}w )Nc           	   3  h   K   | ])  }t        |j                  j                  d d        + yw)Fcandidates_from_pagepage_validator
expand_dirrk   project_nameNr   rQ   is_secure_originr   locr   r   r1   s     r&   r   z0LinkCollector.collect_sources.<locals>.<genexpr>  sA      
4
  %9#||<< #() 
4
   /2c           	   3  h   K   | ])  }t        |j                  j                  d d        + yw)Tr   Nr   r   s     r&   r   z0LinkCollector.collect_sources.<locals>.<genexpr>  sA      
5
  %9#||<<#') 
5
r   z* z' location(s) to search for versions of :
)r   r   )collectionsOrderedDictr   get_index_urls_locationsvaluesr   r\   isEnabledForloggingDEBUG	itertoolschainr   r#   r]   r^   r   r   )r1   r   r   index_url_sourcesfind_links_sourcessliness   ```    r&   collect_sourceszLinkCollector.collect_sources  s2    (33 
4
 ((AA,O
4
 

 &( 	 )44 
5
 
5
 

 &( 	 w}}- #);=NO=QVV%7 QVVHE  u:, ##/.3 E LL5)*./-.
 	
s   >$D<N)rQ   r   r   r   r4   r5   )F)rQ   r   r   r	   r   ry   r4   r   )r4   z	list[str])r   r   r4   IndexContent | None)r   r3   r   r   r4   r   )r6   r7   r8   r   r.   classmethodr   propertyr   r   r  rH   r(   r&   r   r     s     " 
	 
 #(	   	
 
 B , ,B,
,
 1,
 
	,
r(   r   )r$   r3   r4   r   )rC   r   r4   r5   )r$   r3   rQ   r   r4   r5   )r$   r3   rQ   r   r4   r   )r?   ResponseHeadersr4   r   )r   r|   r4   r|   r   r,   )r   r   r   zstr | Exceptionr   zCallable[..., None] | Noner4   r5   )T)rC   r   rk   ry   r4   rw   )r   r   rQ   r   r4   r  )Pr   
__future__r   r   email.messagerb   r   r   r   r   r   urllib.parserM   collections.abcr   r   r   dataclassesr   html.parserr   optparser	   typingr
   r   r   pip._vendorr   pip._vendor.requestsr   pip._vendor.requests.exceptionsr   r   pip._internal.exceptionsr   pip._internal.models.linkr   !pip._internal.models.search_scoper   pip._internal.network.sessionr   pip._internal.network.utilsr   pip._internal.utils.filetypesr   pip._internal.utils.miscr   pip._internal.utils.urlsr   pip._internal.vcsr   sourcesr   r   r   	getLoggerr6   r\   r3   r  r'   	Exceptionr*   rE   rG   rW   r_   rg   ri   r|   r   r   rw   r   r   r   r   r   r   rH   r(   r&   <module>r      s   #       	  > > ! "   ! ) @ ; * 9 4 8 9 9 0 ! A A			8	$ c*)Y )@,	y 	$<~	# 	#B B(  8 $. . .(Z > (,@
@@ %@ 
	@ 48

,0

9x,z ,
h
 h
r(   