python 采集斗图啦(多线程)
import concurrent import requests; from concurrent.futures import ThreadPoolExecutor import os; import parsel; def send_request(url): header = { "user-agent":'Mozilla/5.0 (Macintos
import concurrent import requests; from concurrent.futures import ThreadPoolExecutor import os; import parsel; def send_request(url): header = { "user-agent":'Mozilla/5.0 (Macintos
import requests; import re; import os; import parsel; # 1.请求网页 header = { "user-agent":'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrom
import requests; import re; import os; # 1.请求网页 header = { "user-agent":'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100
过程 获取列表链接 打开链接获取详情 处理数据 保存数据 对数据进行处理 处理杂质 可视化数据 import urllib import pandas as pd import requests from lxml import etree import os import csv import time import pymysql # 全局取消证书验证
import requests #常用的形式 # requests.get() # requests.post() # requests.put() # requests.request('post') # 参数 # url='xxx', # params={'name':11,'pwd':1212}, # cookies = {}, # headers =
import requests from bs4 import BeautifulSoup ## 获取token r1 = requests.get('https://github.com/login') s1 = BeautifulSoup(r1.text,'html.parser') token = s1.find(name='input',attrs
import requests from bs4 import BeautifulSoup response = requests.get("https://www.autohome.com.cn/news/") # 1. content /text 的区别 # print(response.content) # content 拿到的字节 response
itchat 查看手机微信中用户比例 import itchat import pandas as pd # Python抓取微信好友数量、性别、城市分布,以及将py文件打包成exe. #https://zhuanlan.zhihu.com/p/73295760 # 先登录 itchat.auto_login(hotReload=True) # 获取好友列表
sign token获取不到可测试手机端 百度翻译(案例) import requests header = { 'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.38
import requests class TiebaSpider(): def __init__(self,tieba_name): self.tieba_name = tieba_name print(self.tieba_name) self.url_temp = "https://tieba.baidu.com/f?kw="+tieba_name+"