# _*_ coding: utf-8 _*_
"""
python_requests.py by xianhu
"""
import requests.adapters
# ä¸åæ¹å¼è·åç½é¡µå
容, è¿åä¸ä¸ªResponse对象, 请æ±çåæ°å¯ä»¥ä¸ºurlæRequest对象
r0 = requests.get("https://github.com/timeline.json")
r1 = requests.post("http://httpbin.org/post")
r2 = requests.put("http://httpbin.org/put")
r3 = requests.delete("http://httpbin.org/delete")
r4 = requests.head("http://httpbin.org/get")
r5 = requests.options("http://httpbin.org/get")
r6 = requests.patch("http://httpbin.org/get")
# Request对象:
# class requests.Request(method=None, url=None, headers=None, files=None, data=None, params=None, auth=None, cookies=None, hooks=None, json=None)
# ä¸è¾¹ææçè·åæ¹å¼é½è°ç¨åºå±çrequestæ¹æ³, æä»¥requestæ¹æ³æçåæ°, ä¸è¾¹å ä¸ªå½æ°é½åºè¯¥æ:
# requests.request(method, url, **kwargs)
# kwargså
æ¬: params / data / json / headers / cookies / files / auth / timeout / allow_redirects(bool) / proxies / verify(bool) / stream / cert
# Response对象: class requests.Response
# å
å«ç主è¦å±æ§: content / cookies / encoding / headers / history / is_permanent_redirect / is_redirect / reason / status_code / text / url ç
# å
å«çä¸»è¦æ¹æ³: iter_content(chunk_size=1, decode_unicode=False) / iter_lines(chunk_size=512, decode_unicode=None, delimiter=None)
# å
å«çä¸»è¦æ¹æ³: close() / json(**kwargs) / raise_for_status() ç
# 以åå
¸çå½¢å¼ä¼ éURLåæ°, ä¹å¯ä»¥ç´æ¥ä»¥?xx=xx&xx=xxçå½¢å¼å°å
¶æ¾å¨urlå
params = {"key1": "value1", "key2": "value2"}
r = requests.get("http://httpbin.org/get", params=params)
print(r.url) # http://httpbin.org/get?key2=value2&key1=value1
# 以åå
¸çå½¢å¼ä¼ éURLåæ°: åå
¸é带æå表
params = {"key1": "value1", "key2": ["value2", "value3"]}
r = requests.get("http://httpbin.org/get", params=params)
print(r.url) # http://httpbin.org/get?key1=value1&key2=value2&key2=value3
# è·åç½é¡µå
容
r = requests.get("https://github.com/timeline.json")
print(r.text) # è¿åæ£å¸¸çç½é¡µå
容, å³è§£åè§£ç ä¹åçå
容
print(r.content) # è¿åbyteç±»åçç½é¡µå
容, å³å¼è§£å, 没æè§£ç
print(r.json()) # 妿ç½é¡µå
容为json, ç´æ¥è¿åä¸ä¸ªjson对象
print(r.encoding) # è¿åç½é¡µçç¼ç : "utf-8"
# Requestsä¼èªå¨è§£ç æ¥èªæå¡å¨çå
容, ä¹å¯ä»¥èªå·±æ´æ¹
r.encoding = "ISO-8859-1"
print(r.text) # æ¤æ¶ä½¿ç¨æ°çr.encodingè§£ç åçæ°å¼
# ç¼ç çå
¶ä»æä½
# requests.utils.get_encodings_from_content(content): Returns encodings from given content string.
# requests.utils.get_encoding_from_headers(headers): Returns encodings from given HTTP Header Dict.
# requests.utils.get_unicode_from_response(r): Returns the requested content back in unicode.
# åå§ååºå
容: è·åæ¥èªæå¡å¨çåå§å¥æ¥åååº
r = requests.get("https://github.com/timeline.json", stream=True)
print(r.raw) #
print(r.raw.read(10)) # "\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x03"
# ä¸è¬æ
åµä¸, åºè¯¥ä»¥ä¸é¢ç模å¼å°ææ¬æµä¿åå°æä»¶
with open("test", "wb") as fd:
for chunk in r.iter_content(chunk_size=256):
fd.write(chunk)
# 注æ: 设置çtimeout对connectåreadèµ·ä½ç¨. ä½ä¸æ¦åæå¡å¨å»ºç«è¿æ¥, r.contentær.iter_contentå°±å¤äºä¸ä¸ªreadçç¶æ, ä¸åtimeoutå½±å
# å®å¶è¯·æ±å¤´: ä¸ä¸ªåå
¸
headers = {"user-agent": "my-app/0.0.1"}
r = requests.get("https://api.github.com/some/endpoint", headers=headers)
print(r.request.headers) # è·årequestç头é¨
print(r.headers) # è·åresponseç头é¨
# {
# "content-encoding": "gzip",
# "transfer-encoding": "chunked",
# "connection": "close",
# "server": "nginx/1.0.4",
# "x-runtime": "148ms",
# "etag": "e1ca502697e5c9317743dc078f67693f",
# "content-type": "application/json"
# }
print(r.headers["Content-Type"]) # "application/json"
print(r.headers.get("content-type")) # "application/json"
# æ´å 夿çPOST请æ±: 表å
post_dict = {"key1": "value1", "key2": "value2"}
r = requests.post("http://httpbin.org/post", data=post_dict)
print(r.text)
# POSTä¸ä¸ªå¤é¨åç¼ç (Multipart-Encoded)çæä»¶
files = {"file": open("report.xls", "rb")}
r = requests.post("http://httpbin.org/post", files=files)
print(r.text)
# ä½ å¯ä»¥æ¾å¼å°è®¾ç½®æä»¶å, æä»¶ç±»åå请æ±å¤´
files = {"file": ("report.xls", open("report.xls", "rb"), "application/vnd.ms-excel", {"Expires": "0"})}
r = requests.post("http://httpbin.org/post", files=files)
print(r.text)
# ä½ ä¹å¯ä»¥åéææ¬å符串
files = {"file": ("report.csv", "some,data,to,send\nanother,row,to,send\n")}
r = requests.post("http://httpbin.org/post", files=files)
print(r.text)
# ååºç¶æç
r = requests.get("http://httpbin.org/get")
print(r.status_code) # 200
print(r.status_code == requests.codes.ok) # True ååºç¶æç æ¥è¯¢
# 妿åéäºä¸ä¸ªé误请æ±(4XX客æ·ç«¯é误, æ5XXæå¡å¨é误ååº), å¯ä»¥éè¿ Response.raise_for_status() æ¥æåºå¼å¸¸:
bad_r = requests.get("http://httpbin.org/status/404")
print(bad_r.status_code) # 404
bad_r.raise_for_status() # å¼åå¼å¸¸
# Cookie: 妿æä¸ªååºä¸å
å«ä¸äºcookie, åä¼è¢«æ¾å°response.cookies(CookieJarç±»å)ä¸
r = requests.get("http://example.com/some/cookie/setting/url")
print(r.cookies["example_cookie_name"]) # "example_cookie_value"
# è¦æ³åéä½ çcookieså°æå¡å¨, å¯ä»¥ä½¿ç¨cookiesåæ°(ä¸ä¸ªåå
¸)
cookies = dict(cookies_are="working")
r = requests.get("http://httpbin.org/cookies", cookies=cookies)
print(r.text)
# cookieçå
¶ä»æä½
# requests.utils.dict_from_cookiejar(cj): Returns a key/value dictionary from a CookieJar.
# requests.utils.cookiejar_from_dict(cookie_dict, cookiejar=None, overwrite=True): Returns a CookieJar from a key/value dictionary.
# requests.utils.add_dict_to_cookiejar(cj, cookie_dict): Returns a CookieJar from a key/value dictionary.
# éç¨CookieJarç±», ä¸ä¸ªcookielib.CookieJar, 使¯æä¾ä¸ä¸ªdictæ¥å£
# class requests.cookies.RequestsCookieJar(policy=None): Compatibility class; is a cookielib.CookieJar, but exposes a dict interface.
# ä¼è¯å¯¹è±¡: ä¼è¯å¯¹è±¡è®©ä½ è½å¤è·¨è¯·æ±ä¿ææäºåæ°, å®ä¹ä¼å¨åä¸ä¸ªSessionå®ä¾ååºçææè¯·æ±ä¹é´ä¿æcookie
s = requests.Session()
s.get("http://httpbin.org/cookies/set/sessioncookie/123456789")
s.get("http://httpbin.org/cookies")
for cookie in s.cookies:
print(cookie)
# å¦æä½ è¦æå¨ä¸ºä¼è¯æ·»å cookie, å°±æ¯ç¨Cookie utility彿°æ¥æçºµSession.cookies
requests.utils.add_dict_to_cookiejar(s.cookies, {"cookie_key": "cookie_value"})
# ä¼è¯ä¹å¯ç¨æ¥ä¸ºè¯·æ±æ¹æ³æä¾ç¼ºçæ°æ®, è¿æ¯éè¿ä¸ºä¼è¯å¯¹è±¡ç屿§æä¾æ°æ®æ¥å®ç°ç
s.auth = ("user", "pass")
s.headers.update({"x-test": "true"})
s.get("http://httpbin.org/headers", headers={"x-test2": "true"}) # both "x-test" and "x-test2" are sent
# ä¸è¿éè¦æ³¨æ, å°±ç®ä½¿ç¨äºä¼è¯, æ¹æ³çº§å«çåæ°ä¹ä¸ä¼è¢«è·¨è¯·æ±ä¿æ, ä¸é¢çä¾ååªä¼ç»ç¬¬ä¸ä¸ªè¯·æ±åécookie
s.get("http://httpbin.org/cookies", cookies={"from-my": "browser"}) # 带æcookie
s.get("http://httpbin.org/cookies") # ä¸å¸¦cookie
# ä¼è¯è¿å¯ä»¥ç¨ä½ååæç®¡çå¨
with requests.Session() as s:
s.get("http://httpbin.org/cookies/set/sessioncookie/123456789")
# class requests.Sessionç±», årequestså¤å±æç彿°/屿§åºæ¬ä¸è´, åªä¸è¿æ¯å°è£
äºä¸å±è·¨å请æ±çåè½
# éå®åä¸è¯·æ±åå², é»è®¤æ
åµä¸, é¤äºHEAD, Requestsä¼èªå¨å¤çææéå®å, å¯ä»¥éè¿allow_redirectsåæ°ç¦ç¨éå®åå¤ç
# å¯ä»¥ä½¿ç¨ååºå¯¹è±¡çhistoryæ¹æ³æ¥è¿½è¸ªéå®å, Response.history æ¯ä¸ä¸ªResponse对象çå表, æç
§ä»æèå°æè¿ç请æ±è¿è¡æåº
r = requests.get("http://github.com", allow_redirects=True)
print(r.status_code) # 200
print(r.history) # []
r = requests.get("http://github.com", allow_redirects=False)
print(r.status_code) # 301
print(r.history) # []
# è¶
æ¶, 设置timeoutåæ°
requests.get("http://github.com", timeout=0.001)
# Traceback (most recent call last):
# File "", line 1, in
# requests.exceptions.Timeout: HTTPConnectionPool(host="github.com", port=80): Request timed out. (timeout=0.001)
# 注æ: timeoutä»
å¯¹è¿æ¥è¿ç¨ææ, ä¸ååºä½çä¸è½½æ å
³
# timeout并䏿¯æ´ä¸ªä¸è½½ååºçæ¶é´éå¶, èæ¯å¦ææå¡å¨å¨timeoutç§å
没æåºç, å°ä¼å¼åä¸ä¸ªå¼å¸¸
# æ´ç²¾ç¡®å°è¯´, æ¯å¨timeoutç§å
没æä»åºç¡å¥æ¥å䏿¥æ¶å°ä»»ä½åèçæ°æ®æ¶
requests.get("https://github.com", timeout=5)
# ä¸è¾¹çtimeoutå¼å°ä¼ç¨ä½ connect å read äºè
çtimeout, 妿è¦åå«å¶å®, å°±ä¼ å
¥ä¸ä¸ªå
ç»
requests.get("https://github.com", timeout=(3.05, 27))
# é误ä¸å¼å¸¸: éå°ç½ç»é®é¢(å¦: DNS æ¥è¯¢å¤±è´¥ãæç»è¿æ¥ç)æ¶, Requests 伿åºä¸ä¸ª ConnectionError å¼å¸¸
# 妿 HTTP 请æ±è¿åäºä¸æåçç¶æç , Response.raise_for_status() 伿åºä¸ä¸ª HTTPError å¼å¸¸
# è¥è¯·æ±è¶
æ¶, åæåºä¸ä¸ª Timeout å¼å¸¸
# è¥è¯·æ±è¶
è¿äºè®¾å®çæå¤§éå®å次æ°, å伿åºä¸ä¸ª TooManyRedirects å¼å¸¸
# ææRequestsæ¾å¼æåºçå¼å¸¸é½ç»§æ¿èª requests.exceptions.RequestException
# ææå¼å¸¸:
# exception requests.RequestException(*args, **kwargs): There was an ambiguous exception that occurred while handling your request.
# exception requests.ConnectionError(*args, **kwargs): A Connection error occurred.
# exception requests.HTTPError(*args, **kwargs): An HTTP error occurred.
# exception requests.URLRequired(*args, **kwargs): A valid URL is required to make a request.
# exception requests.TooManyRedirects(*args, **kwargs): Too many redirects.
# exception requests.ConnectTimeout(*args, **kwargs): The request timed out while trying to connect to the remote server.
# exception requests.ReadTimeout(*args, **kwargs): The server did not send any data in the allotted amount of time.
# exception requests.Timeout(*args, **kwargs): The request timed out.
# SSLè¯ä¹¦éªè¯, verify设置为Trueè¡¨ç¤ºæ£æ¥è¯ä¹¦, 设置为False表示忽ç¥è¯ä¹¦
requests.get("https://kennethreitz.com", verify=True) # æªè®¾ç½®SSLè¯ä¹¦, æåºå¼å¸¸
# requests.exceptions.SSLError: hostname "kennethreitz.com" doesn"t match either of "*.herokuapp.com", "herokuapp.com"
requests.get("https://github.com", verify=True) # , 已设置SSLè¯ä¹¦
# 对äºç§æè¯ä¹¦ï¼ä½ ä¹å¯ä»¥ä¼ éä¸ä¸ª CA_BUNDLE æä»¶çè·¯å¾ç» verify
# ä½ ä¹å¯ä»¥æå®ä¸ä¸ªæ¬å°è¯ä¹¦ç¨ä½å®¢æ·ç«¯è¯ä¹¦, å¯ä»¥æ¯å个æä»¶ï¼å
å«å¯é¥åè¯ä¹¦ï¼æä¸ä¸ªå
å«ä¸¤ä¸ªæä»¶è·¯å¾çå
ç»:
requests.get("https://kennethreitz.com", cert=("/path/server.crt", "/path/key"))
requests.get("https://kennethreitz.com", cert="/wrong_path/server.pem")
# SSLError: [Errno 336265225] _ssl.c:347: error:140B0009:SSL routines:SSL_CTX_use_PrivateKey_file:PEM lib
# è¦å: æ¬å°è¯ä¹¦çç§æ key å¿
é¡»æ¯è§£å¯ç¶æ. ç®åRequests䏿¯æä½¿ç¨å å¯ç key
# æµå¼ä¸ä¼ , å
è®¸ä½ åéå¤§çæ°æ®æµææä»¶èæ éå
æå®ä»¬è¯»å
¥å
å
with open("massive-body") as f:
requests.post("http://some.url/streamed", data=f)
# äºä»¶æé©, å¯ç¨çé©å: response(ä»ä¸ä¸ªè¯·æ±äº§ççååº)
# ä½ å¯ä»¥éè¿ä¼ éä¸ä¸ª {hook_name: callback_function} åå
¸ç» hooks 请æ±åæ°ä¸ºæ¯ä¸ªè¯·æ±åé
ä¸ä¸ªé©å彿°
def print_url(resp):
print(resp.url)
return
requests.get("http://httpbin.org", hooks=dict(response=print_url))
# 代ç
proxies = {
"http": "http://10.10.1.10:3128",
"https": "http://10.10.1.10:1080",
}
requests.get("http://example.org", proxies=proxies)
# è¥ä»£çéè¦ä½¿ç¨HTTP Basic Auth, å¯ä»¥ä½¿ç¨http://user:password@host:port/, æ¯å¦"http": "http://user:[email protected]:3128/"
# é¤äºåºæ¬ç HTTP 代ç, Request è¿æ¯æ SOCKS åè®®ç代ç, æ¤æ¶éè¦åç¬å®è£
:
# $ pip install requests[socks]
proxies = {
"http": "socks5://user:pass@host:port",
"https": "socks5://user:pass@host:port"
}
requests.get("http://example.org", proxies=proxies)
# Requests ä¼ è¾éé
å¨
# ä» v1.0.0 以åï¼Requests çå
é¨éç¨äºæ¨¡åå设计ãé¨ååå æ¯ä¸ºäºå®ç°ä¼ è¾éé
å¨ï¼Transport Adapterï¼ã
# ä¼ è¾éé
卿ä¾äºä¸ä¸ªæºå¶ï¼è®©ä½ å¯ä»¥ä¸º HTTP æå¡å®ä¹äº¤äºæ¹æ³ãå°¤å
¶æ¯å®å
è®¸ä½ åºç¨æå¡åçé
ç½®ã
# Requests èªå¸¦äºä¸ä¸ªä¼ è¾éé
å¨ï¼ä¹å°±æ¯ HTTPAdapterã è¿ä¸ªéé
å¨ä½¿ç¨äºå¼ºå¤§ç urllib3ï¼ä¸º Requests æä¾äºé»è®¤ç HTTP å HTTPS 交äºã
# æ¯å½ Session 被åå§åï¼å°±ä¼æéé
å¨éçå¨ Session ä¸ï¼å
¶ä¸ä¸ä¸ªä¾ HTTP 使ç¨ï¼å¦ä¸ä¸ªä¾ HTTPS 使ç¨ã
# Request å
è®¸ç¨æ·å建å使ç¨ä»ä»¬èªå·±çä¼ è¾éé
å¨ï¼å®ç°ä»ä»¬éè¦çç¹æ®åè½ãå建好以åï¼ä¼ è¾éé
å¨å¯ä»¥è¢«å è½½å°ä¸ä¸ªä¼è¯å¯¹è±¡ä¸ï¼é带çä¸ä¸ªè¯´æï¼åè¯ä¼è¯éé
å¨åºè¯¥åºç¨å¨åªä¸ª web æå¡ä¸ã
s = requests.Session()
s.mount("http://baidu.com", requests.adapters.HTTPAdapter())
# åºç°é误: Connection pool is full, discarding connection: xxxx.com
s.mount('https://', requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100))
# å
³éInsecurePlatformWarning
# requests.packages.urllib3.disable_warnings()