tzh
2024-08-22 c7d0944258c7d0943aa7b2211498fd612971ce27
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
M\¬Qc@s}dZddlZddlZdgZdd d„ƒYZdd d„ƒYZdd d„ƒYZd    ejfd
„ƒYZdS(s< robotparser.py
 
    Copyright (C) 2000  Bastian Kleineidam
 
    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2
 
    The robots.txt Exclusion Protocol is implemented as specified in
    http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
iÿÿÿÿNtRobotFileParsercBsbeZdZdd„Zd„Zd„Zd„Zd„Zd„Zd„Z    d    „Z
d
„Z RS( ss This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.
 
    tcCs>g|_d|_t|_t|_|j|ƒd|_dS(Ni(tentriestNonet default_entrytFalset disallow_allt    allow_alltset_urlt last_checked(tselfturl((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyt__init__s                  cCs|jS(s·Returns the time the robots.txt file was last fetched.
 
        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.
 
        (R    (R
((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pytmtime scCsddl}|jƒ|_dS(sYSets the time the robots.txt file was last fetched to the
        current time.
 
        iÿÿÿÿN(ttimeR    (R
R((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pytmodified)s cCs/||_tj|ƒdd!\|_|_dS(s,Sets the URL referring to a robots.txt file.iiN(R turlparsethosttpath(R
R ((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyR1s    cCs¯tƒ}|j|jƒ}g|D]}|jƒ^q"}|jƒ|j|_|jdkrkt|_n@|jdkr†t|_n%|jdkr«|r«|j    |ƒndS(s4Reads the robots.txt URL and feeds it to the parser.i‘i“iiÈN(i‘i“(
t    URLopenertopenR tstriptcloseterrcodetTrueRRtparse(R
topenertftlinetlines((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pytread6s    
   cCsAd|jkr-|jdkr=||_q=n|jj|ƒdS(Nt*(t
useragentsRRRtappend(R
tentry((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyt
_add_entryDscCsd}d}tƒ}xä|D]Ü}|d7}|s~|dkrPtƒ}d}q~|dkr~|j|ƒtƒ}d}q~n|jdƒ}|dkr¦|| }n|jƒ}|s¾qn|jddƒ}t|ƒdkr|djƒjƒ|d<tj|djƒƒ|d<|ddkrk|dkrN|j|ƒtƒ}n|j    j
|dƒd}qø|ddkr°|dkrõ|j j
t |dt ƒƒd}qõqø|ddkrø|dkrõ|j j
t |dtƒƒd}qõqøqqW|dkr|j|ƒnd    S(
sparse the input lines from a robots.txt file.
           We allow that a user-agent: line is not preceded by
           one or more blank lines.iiit#t:s
user-agenttdisallowtallowN(tEntryR#tfindRtsplittlentlowerturllibtunquoteR R!t    rulelinestRuleLineRR(R
Rtstatet
linenumberR"Rti((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyRMsN     
                              cCsÊ|jr tS|jrtStjtj|ƒƒ}tjdd|j|j    |j
|j fƒ}tj |ƒ}|s}d}nx-|j D]"}|j|ƒr‡|j|ƒSq‡W|jrÆ|jj|ƒStS(s=using the parsed robots.txt decide if useragent can fetch urlRt/(RRRRRR-R.t
urlunparseRtparamstquerytfragmenttquoteRt
applies_tot    allowanceR(R
t    useragentR t
parsed_urlR"((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyt    can_fetch€s                 cCs-djg|jD]}t|ƒd^qƒS(NRs
(tjoinRtstr(R
R"((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyt__str__˜s( t__name__t
__module__t__doc__R R RRRR#RR>RA(((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyRs                                 3    R0cBs)eZdZd„Zd„Zd„ZRS(soA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.cCs;|dkr| rt}ntj|ƒ|_||_dS(NR(RR-R9RR;(R
RR;((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyR Ÿs    cCs|jdkp|j|jƒS(NR(Rt
startswith(R
tfilename((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyR:¦scCs|jrdpdd|jS(NtAllowtDisallows: (R;R(R
((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyRA©s(RBRCRDR R:RA(((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyR0œs        R(cBs2eZdZd„Zd„Zd„Zd„ZRS(s?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_dS(N(R R/(R
((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyR ¯s    cCsjg}x'|jD]}|jd|dgƒqWx*|jD]}|jt|ƒdgƒq:Wdj|ƒS(Ns User-agent: s
R(R textendR/R@R?(R
trettagentR((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyRA³s cCs]|jdƒdjƒ}x=|jD]2}|dkr9tS|jƒ}||kr#tSq#WtS(s2check if this entry applies to the specified agentR4iR(R*R,R RR(R
R<RK((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyR:»s   cCs.x'|jD]}|j|ƒr
|jSq
WtS(sZPreconditions:
        - our agent applies to this entry
        - filename is URL decoded(R/R:R;R(R
RFR((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyR;Ès (RBRCRDR RAR:R;(((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyR(­s
             RcBs#eZd„Zd„Zd„ZRS(cGs tjj||Œd|_dS(NiÈ(R-tFancyURLopenerR R(R
targs((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyR ÒscCsdS(N(NN(R(R
Rtrealm((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pytprompt_user_passwdÖscCs(||_tjj||||||ƒS(N(RR-RLthttp_error_default(R
R tfpRterrmsgtheaders((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyRPÛs    (RBRCR RORP(((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyRÑs        ((((    RDRR-t__all__RR0R(RLR(((sS/tmp/ndk-User/buildhost/install/prebuilt/darwin-x86_64/lib/python2.7/robotparser.pyt<module> s      Š$