AlkantarClanX12

Your IP : 3.145.63.148


Current Path : /opt/alt/python37/lib64/python3.7/urllib/__pycache__/
Upload File :
Current File : //opt/alt/python37/lib64/python3.7/urllib/__pycache__/robotparser.cpython-37.pyc

B

� f�"�@s\dZddlZddlZddlZdgZe�dd�ZGdd�d�ZGdd�d�Z	Gd	d
�d
�Z
dS)a% robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://www.robotstxt.org/norobots-rfc.txt
�N�RobotFileParser�RequestRatezrequests secondsc@sjeZdZdZddd�Zdd�Zdd�Zd	d
�Zdd�Zd
d�Z	dd�Z
dd�Zdd�Zdd�Z
dd�ZdS)rzs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    �cCs,g|_d|_d|_d|_|�|�d|_dS)NFr)�entries�
default_entry�disallow_all�	allow_all�set_url�last_checked)�self�url�r
�7/opt/alt/python37/lib64/python3.7/urllib/robotparser.py�__init__s
zRobotFileParser.__init__cCs|jS)z�Returns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )r
)rr
r
r�mtime$szRobotFileParser.mtimecCsddl}|��|_dS)zYSets the time the robots.txt file was last fetched to the
        current time.

        rN)�timer
)rrr
r
r�modified-szRobotFileParser.modifiedcCs&||_tj�|�dd�\|_|_dS)z,Sets the URL referring to a robots.txt file.��N)r�urllib�parse�urlparseZhost�path)rrr
r
rr	5szRobotFileParser.set_urlc
Cs�ytj�|j�}WnRtjjk
rd}z0|jdkr:d|_n|jdkrT|jdkrTd|_Wdd}~XYnX|�	�}|�
|�d����dS)z4Reads the robots.txt URL and feeds it to the parser.)i�i�Ti�i�Nzutf-8)
rZrequestZurlopenr�errorZ	HTTPError�coderr�readr�decode�
splitlines)r�f�err�rawr
r
rr:s
zRobotFileParser.readcCs,d|jkr|jdkr(||_n|j�|�dS)N�*)�
useragentsrr�append)r�entryr
r
r�
_add_entryGs

zRobotFileParser._add_entrycCs6d}t�}|���x|D�]�}|sT|dkr8t�}d}n|dkrT|�|�t�}d}|�d�}|dkrr|d|�}|��}|s�q|�dd�}t|�dkr|d����|d<tj	�
|d���|d<|ddk�r|dkr�|�|�t�}|j�|d�d}q|ddk�r4|dk�r|j
�t|dd	��d}q|dd
k�rh|dk�r|j
�t|dd��d}q|ddk�r�|dk�r|d�����r�t|d�|_d}q|dd
kr|dkr|d�d�}t|�dk�r|d�����r|d�����rtt|d�t|d��|_d}qW|dk�r2|�|�dS)z�Parse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        rr��#N�:z
user-agentZdisallowFZallowTzcrawl-delayzrequest-rate�/)�Entryrr%�find�strip�split�len�lowerrr�unquoter"r#�	rulelines�RuleLine�isdigit�int�delayr�req_rate)r�lines�stater$�line�iZnumbersr
r
rrPsd






 
zRobotFileParser.parsecCs�|jr
dS|jrdS|jsdStj�tj�|��}tj�dd|j|j	|j
|jf�}tj�|�}|sfd}x"|j
D]}|�|�rn|�|�SqnW|jr�|j�|�SdS)z=using the parsed robots.txt decide if useragent can fetch urlFTrr))rrr
rrrr0�
urlunparserZparamsZqueryZfragment�quoter�
applies_to�	allowancer)r�	useragentrZ
parsed_urlr$r
r
r�	can_fetch�s$
zRobotFileParser.can_fetchcCs>|��sdSx|jD]}|�|�r|jSqW|jr:|jjSdS)N)rrr=r5r)rr?r$r
r
r�crawl_delay�s

zRobotFileParser.crawl_delaycCs>|��sdSx|jD]}|�|�r|jSqW|jr:|jjSdS)N)rrr=r6r)rr?r$r
r
r�request_rate�s

zRobotFileParser.request_ratecCs0|j}|jdk	r||jg}d�tt|��dS)N�
)rr�join�map�str)rrr
r
r�__str__�s
zRobotFileParser.__str__N)r)�__name__�
__module__�__qualname__�__doc__rrrr	rr%rr@rArBrGr
r
r
rrs
	
	C

c@s(eZdZdZdd�Zdd�Zdd�ZdS)	r2zoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.cCs<|dkr|sd}tj�tj�|��}tj�|�|_||_dS)NrT)rrr;rr<rr>)rrr>r
r
rr�s
zRuleLine.__init__cCs|jdkp|�|j�S)Nr!)r�
startswith)r�filenamer
r
rr=�szRuleLine.applies_tocCs|jr
dndd|jS)NZAllowZDisallowz: )r>r)rr
r
rrG�szRuleLine.__str__N)rHrIrJrKrr=rGr
r
r
rr2�sr2c@s0eZdZdZdd�Zdd�Zdd�Zdd	�Zd
S)r*z?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_d|_d|_dS)N)r"r1r5r6)rr
r
rr�szEntry.__init__cCs�g}x|jD]}|�d|���qW|jdk	r@|�d|j���|jdk	rj|j}|�d|j�d|j���|�tt|j	��|�d�d�
|�S)NzUser-agent: z
Crawl-delay: zRequest-rate: r)rrC)r"r#r5r6ZrequestsZseconds�extendrErFr1rD)rZret�agentZrater
r
rrG�s


z
Entry.__str__cCsF|�d�d��}x.|jD]$}|dkr*dS|��}||krdSqWdS)z2check if this entry applies to the specified agentr)rr!TF)r-r/r")rr?rOr
r
rr=�szEntry.applies_tocCs$x|jD]}|�|�r|jSqWdS)zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)r1r=r>)rrMr9r
r
rr>�s

zEntry.allowanceN)rHrIrJrKrrGr=r>r
r
r
rr*�s


r*)rK�collectionsZurllib.parserZurllib.request�__all__�
namedtuplerrr2r*r
r
r
r�<module>s6