网络编程
一、socket
1. 套接字
1.1 起源
套接字的起源可以追溯到20世纪70年代,它是加利福尼亚大学的伯克利版本UNIX的一部分。套接字最初是为同一主机上的应用程序所创建,使得主机上运行的一个程序与另一个运行的程序进行通信,这就是所谓的进程间通信。
1.2 分类
- 基于文件的:AF_UNIX家族
- 面向网络的:AF_INET家族、AF_UNIX6(IPv6)
1.3 套接字类型
-
面向连接的套接字:用TCP协议实现,SOCK_STREAM作为套接字类型
-
无连接的套接字:用UDP协议实现,SOCK_DGRAM作为套接字类型
2. socket模块
2.1 一对一的连接
serser端
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 | #!/usr/bin/env python3
# _*_ coding:utf-8 _*_
# filename: server.py
import socket
sock = socket.socket()
sock.bind(("127.0.0.1", 8080)) # 申请操作系统资源
sock.listen()
conn, addr = sock.accept() #
conn.send(b'hello')
msg = conn.recv(1024)
print(msg) # b'Bye-Bye'
conn.close()
sock.close() # 归还申请的操作系统资源
|
client端
1
2
3
4
5
6
7
8
9
10
11
12
13 | #!/usr/bin/env python3
# _*_ coding:utf-8 _*_
# filename: client.py
import socket
sock = socket.socket()
sock.connect(("127.0.0.1", 8080))
msg = sock.recv(1024)
print(msg) # b'hello'
sock.send(b'Bye-Bye')
sock.close()
|
2.2 一个服务端接收多个客户端
server端
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 | #!/usr/bin/env python3
# _*_ coding:utf-8 _*_
# filename: server.py
import socket
sock = socket.socket()
sock.bind(("127.0.0.1", 8080))
sock.listen()
while True: # 和多个客户端建立连接
conn, addr = sock.accept()
while True:
send_msg = input('>>>')
conn.send(send_msg.encode('utf-8'))
if send_msg == "q":
break
msg = conn.recv(1024).decode('utf-8')
if msg == "q":
break
print(msg)
conn.close() # 断开和客户端的连接
sock.close()
|
client端
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 | #!/usr/bin/env python3
# _*_ coding:utf-8 _*_
# filename: client.py
import socket
sock = socket.socket()
sock.connect(("127.0.0.1", 8080))
while True:
msg = sock.recv(1024).decode('utf-8')
if msg == 'q':
break
print(msg)
send_msg = input('>>>')
sock.send(send_msg.encode('utf-8'))
if send_msg == 'q':
break
sock.close()
|
UDP连接
server端
| #!/usr/bin/env python3
# _*_ coding:utf-8 _*_
# filename: server.py
import socket
sock = socket.socket(type=socket.SOCK_DGRAM)
sock.bind(("127.0.0.1", 8080))
msg, addr = sock.recvfrom(1024)
print(msg)
sock.sendto(b'Bye', addr)
|
client端
| #!/usr/bin/env python3
# _*_ coding:utf-8 _*_
# filename: client.py
import socket
sock = socket.socket(type=socket.SOCK_DGRAM)
server = ("127.0.0.1", 8080)
sock.sendto(b'hello', server)
msg = sock.recv(1024)
print(msg)
|
一个服务端接收多个客户端消息
server端
1
2
3
4
5
6
7
8
9
10
11
12
13 | #!/usr/bin/env python3
# _*_ coding:utf-8 _*_
# filename: server.py
import socket
sock = socket.socket(type=socket.SOCK_DGRAM)
sock.bind(("127.0.0.1", 8080))
while True:
msg, addr = sock.recvfrom(1024)
print(msg.decode("utf-8"))
send_msg = input(">>>")
sock.sendto(send_msg.encode("utf-8"), addr)
|
client端
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 | #!/usr/bin/env python3
# _*_ coding:utf-8 _*_
# filename: client.py
import socket
sock = socket.socket(type=socket.SOCK_DGRAM)
server = ("127.0.0.1", 8080)
while True:
send_msg = input(">>>")
if send_msg == "q":
break
sock.sendto(send_msg.encode("utf-8"), server)
msg = sock.recv(1024).decode("utf-8")
if msg == "q":
break
print(msg)
|
粘包现象
含义:两条连续发送的数据粘连在一起。只会出现在TCP协议中,由于TCP的传输是流式传输,数据和数据之间没有边界。
原因:
- 两条消息很短,并且发送间隔很短
- 接收端接收不及时
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 | #!/usr/bin/env python3
# _*_ coding:utf-8 _*_
# filename: server.py
import socket
sock = socket.socket()
sock.bind(("127.0.0.1", 8080))
sock.listen()
conn, addr = sock.accept()
conn.send(b'hello')
conn.send(b'world')
conn.close()
sock.close()
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 | #!/usr/bin/env python3
# _*_ coding:utf-8 _*_
# filename: client.py
import socket
import time
sock = socket.socket()
sock.connect(("127.0.0.1",8080))
time.sleep(0.1)
msg1 = sock.recv(1024)
print(msg1) # b'helloworld'
msg2 = sock.recv(1024)
print(msg2) # b''
sock.close()
|
解决:
- 计算即将要发送的数据的长度,通过struct模块将长度转换成固定的4个字节,将转换后的字节发送给客户端
- 客户端接收4个字节,再使用stract.unpack把4个字节转换成数字,这个数字就是即将要接收的数据的长度。然后再根据数据长度接收数据
二、socketserver
socketserver基于socket完成的,用在TCP协议下server端处理并发的客户端请求。
server端
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 | #!/usr/bin/env python3
# _*_ coding:utf-8 _*_
# filename: server.py
import socketserver
import time
class MyServer(socketserver.BaseRequestHandler):
def handle(self):
conn = self.request # <socket.socket fd=496, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=0, laddr=('127.0.0.1', 8080), raddr=('127.0.0.1', 52789)>
while True:
try:
content = conn.recv(1024).decode("utf-8")
conn.send(content.upper().encode("utf-8"))
time.sleep(1)
except ConnectionResetError:
break
server = socketserver.ThreadingTCPServer(("127.0.0.1", 8080), MyServer)
server.serve_forever()
|
client端
1
2
3
4
5
6
7
8
9
10
11
12 | #!/usr/bin/env python3
# _*_ coding:utf-8 _*_
# filename: client.py
import socket
sock = socket.socket()
sock.connect(("127.0.0.1", 8080))
while True:
sock.send(b'hello')
content = sock.recv(1024).decode('utf-8')
print(content)
|
三、HTTP
1. urllib
get请求
| import urllib.request
url = 'http://www.baidu.com/'
response = urllib.request.urlopen(url)
print(response) # <http.client.HTTPResponse object at 0x000002D28D124F60>
data = response.read().decode('utf-8')
with open('baidu.html', 'w', encoding='utf-8') as f:
f.write(data)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 | import urllib.request
url = 'https://www.baidu.com/'
# 添加请求头信息,模拟真实浏览器访问
header = {
# 浏览器版本
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36",
}
# 创建一个请求对象
r = urllib.request.Request(url, headers=header)
# 获取所有请求头信息
print(r.headers)
# 获取指定请求头信息(键的首字母要大写,其他小写)
print(r.get_header("User-agent"))
# 获取请求的url
print(r.get_full_url())
# 请求数据
response =urllib.request.urlopen(r)
data = response.read().decode('utf-8')
with open('baidu.html', 'w', encoding='utf-8') as f:
f.write(data)
|
IP代理
urlopen()不支持添加代理,需要自己定义
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 | import urllib.request
url = 'https://www.cnblogs.com/gaoyuanzhi'
# 添加代理
proxy = {
"http":"http://117.88.5.142:3000"
}
proxy_handler = urllib.request.ProxyHandler(proxy)
# 创建自己的opener
opener = urllib.request.build_opener(proxy_handler)
# 拿着代理IP去发送请求
response = opener.open(url)
data = response.read().decode('utf-8')
print(data)
|
Cookies
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40 | import urllib.request
import urllib.parse
from http import cookiejar
# 先登录,再带着Cookie访问
# 登录网址
login_url = "https://www.yaozh.com/login/"
# 登录参数
login_data = {
'username': 'gaoyuanzhi',
'pwd': '',
'formhash': '5798AB70BB',
'backurl': 'https%3A%2F%2Fwww.yaozh.com%2F'
}
# 参数需要转码,POST请求需要使用byte类型
data = urllib.parse.urlencode(login_data).encode('utf-8')
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36",
}
# 保存cookie
cookie_jar = cookiejar.CookieJar()
# 定义有添加cookie功能的处理器
cookie_handler = urllib.request.HTTPCookieProcessor(cookie_jar)
# 根据处理器生成opener
opener = urllib.request.build_opener(cookie_handler)
login_request = urllib.request.Request(login_url, headers=header, data=data)
# 发起登录请求
opener.open(login_request)
center = 'https://www.yaozh.com/member/'
center_request = urllib.request.Request(center, headers=header)
# 登录到个人中心
response = opener.open(center_request)
data = response.read().decode('utf-8')
with open('yaozhi.html', 'w', encoding='utf-8') as f:
f.write(data)
|
2. requests
get
| import requests
url = 'http://www.baidu.com'
response = requests.get(url)
print(response) # <Response [200]>
data = response.content.decode('utf-8')
print(type(data)) # <class 'str'>
print(response.status_code) # 200
|
| import requests
url = 'http://www.baidu.com'
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36",
}
response = requests.get(url, headers=header)
print('响应状态码:',response.status_code)
print('请求头:',response.request.headers)
print('响应头:',response.headers)
print('cookie:',response.cookies)
print('实际请求的url',response.url)
|
get请求带参数
| import requests
payload = {'key1': 'value1', 'key2': 'value2'}
r = requests.get("http://httpbin.org/get", params=payload)
print(r.url)
|
代理
1
2
3
4
5
6
7
8
9
10
11
12 | import requests
url = 'https://www.cnblogs.com/gaoyuanzhi' # 访问百度返回500
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36",
}
free_proxy = {
'http':'101.231.104.82:80'
}
response = requests.get(url=url, headers=header, proxies=free_proxy)
print(response.status_code) # 200
|
忽略SSL认证
| import requests
url = 'https://www.12306.cn' # 2020-3-2访问时,CA已经信任了
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36",
}
# 忽略CA证书认证
response = requests.get(url=url, headers=header, verify=False)
data = response.content.decode()
with open('12306.html', 'w', encoding='utf-8') as f:
f.write(data)
|
cookies
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 | import requests
member_url = 'https://www.yaozh.com/member/'
login_url = "https://www.yaozh.com/login/"
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36",
}
login_data = {
'username': 'xxxxxx',
'pwd': 'xxxxxxx',
'formhash': 'xxxxxxx',
'backurl': 'https%3A%2F%2Fwww.yaozh.com%2F'
}
# 可以自动保存cookie
s = requests.Session()
login_response = s.post(login_url, data=login_data, headers=header)
data = s.get(member_url, headers=header).content.decode()
with open('yaozhi.html', 'w', encoding='utf-8') as f:
f.write(data)
|