Selenium是一个自动化网页操作的工具,可以模拟人在浏览器中的各种操作。它常用于网站测试、数据采集和重复性网页任务自动化。
使用pip安装Selenium库:
pip install selenium安装完成后检查版本:
import selenium
print(selenium.__version__)Selenium需要通过浏览器驱动来控制浏览器。不同浏览器需要不同的驱动:
Chrome浏览器:ChromeDriver
Firefox浏览器:GeckoDriver
Edge浏览器:EdgeDriver
从Selenium 4开始,建议使用Service对象来配置驱动:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
# 指定ChromeDriver路径
service = ChromeService(executable_path="/path/to/chromedriver")
driver = webdriver.Chrome(service=service)Selenium 4可以自动下载匹配的驱动,但在国内网络环境下可能不太稳定:
from selenium import webdriver
driver = webdriver.Chrome() # 自动检测和下载驱动from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
# 启动浏览器
service = ChromeService(executable_path="/path/to/chromedriver")
driver = webdriver.Chrome(service=service)
# 打开网页
driver.get("https://www.baidu.com")
# 获取页面标题
print("页面标题:", driver.title)
# 关闭浏览器
driver.quit()Selenium提供多种方式查找元素:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
service = ChromeService(executable_path="/path/to/chromedriver")
driver = webdriver.Chrome(service=service)
try:
driver.get("https://www.baidu.com")
# 通过ID查找
search_box = driver.find_element(By.ID, "kw")
# 通过类名查找
# search_button = driver.find_element(By.CLASS_NAME, "s_btn")
# 通过标签名查找
links = driver.find_elements(By.TAG_NAME, "a")
print(f"找到 {len(links)} 个链接")
# 通过css选择器查找
# elements = driver.find_elements(By.CSS_SELECTOR, ".class_name")
# 通过XPath查找
# element = driver.find_element(By.XPATH, "//input[@name='wd']")
finally:
driver.quit()from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.keys import Keys
service = ChromeService(executable_path="/path/to/chromedriver")
driver = webdriver.Chrome(service=service)
try:
driver.get("https://www.baidu.com")
# 找到搜索框
search_box = driver.find_element(By.ID, "kw")
# 输入搜索关键词
search_box.send_keys("Python编程")
# 按回车键搜索
search_box.send_keys(Keys.RETURN)
# 或者找到搜索按钮并点击
# search_button = driver.find_element(By.ID, "su")
# search_button.click()
# 等待页面加载
import time
time.sleep(3)
print("搜索后的页面标题:", driver.title)
finally:
driver.quit()网页加载需要时间,需要使用等待机制:
import time
# 简单等待3秒
time.sleep(3)# 设置全局等待时间
driver.implicitly_wait(10) # 最多等待10秒from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
# 等待特定元素出现
wait = WebDriverWait(driver, 10) # 最多等待10秒
element = wait.until(EC.presence_of_element_located((By.ID, "content_left")))
# 等待元素可点击
button = wait.until(EC.element_to_be_clickable((By.ID, "submit_btn")))from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def auto_login(username, password):
service = ChromeService(executable_path="/path/to/chromedriver")
driver = webdriver.Chrome(service=service)
try:
# 打开登录页面
driver.get("https://example.com/login")
# 等待页面加载
wait = WebDriverWait(driver, 10)
# 输入用户名
username_field = wait.until(EC.presence_of_element_located((By.NAME, "username")))
username_field.send_keys(username)
# 输入密码
password_field = driver.find_element(By.NAME, "password")
password_field.send_keys(password)
# 点击登录按钮
login_button = driver.find_element(By.XPATH, "//button[@type='submit']")
login_button.click()
# 等待登录成功
wait.until(EC.url_contains("dashboard"))
print("登录成功!")
# 可以继续其他操作...
except Exception as e:
print(f"登录失败: {e}")
finally:
driver.quit()
# 使用示例
# auto_login("your_username", "your_password")from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def scrape_news():
service = ChromeService(executable_path="/path/to/chromedriver")
driver = webdriver.Chrome(service=service)
try:
driver.get("https://news.example.com")
wait = WebDriverWait(driver, 10)
# 等待新闻列表加载
news_list = wait.until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".news-item"))
)
news_data = []
for news_item in news_list:
try:
# 提取标题
title_element = news_item.find_element(By.CSS_SELECTOR, ".title")
title = title_element.text
# 提取链接
link = title_element.get_attribute("href")
# 提取时间
time_element = news_item.find_element(By.CSS_SELECTOR, ".time")
time_text = time_element.text
news_data.append({
'title': title,
'link': link,
'time': time_text
})
except Exception as e:
print(f"提取新闻项时出错: {e}")
continue
print(f"成功采集 {len(news_data)} 条新闻")
return news_data
finally:
driver.quit()
# 使用示例
# news = scrape_news()
# for item in news:
# print(f"标题: {item['title']}")
# print(f"时间: {item['time']}")
# print("---")from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.support.ui import Select
import time
def fill_form():
service = ChromeService(executable_path="/path/to/chromedriver")
driver = webdriver.Chrome(service=service)
try:
driver.get("https://example.com/contact")
# 填写姓名
name_field = driver.find_element(By.NAME, "name")
name_field.send_keys("张三")
# 填写邮箱
email_field = driver.find_element(By.NAME, "email")
email_field.send_keys("zhangsan@example.com")
# 选择下拉菜单
subject_select = Select(driver.find_element(By.NAME, "subject"))
subject_select.select_by_visible_text("技术支持")
# 填写消息
message_field = driver.find_element(By.NAME, "message")
message_field.send_keys("这是一个测试消息")
# 勾选复选框
checkbox = driver.find_element(By.NAME, "agree")
if not checkbox.is_selected():
checkbox.click()
# 提交表单前等待一下
time.sleep(2)
# 提交表单
submit_button = driver.find_element(By.XPATH, "//button[@type='submit']")
submit_button.click()
print("表单提交成功!")
finally:
driver.quit()
# fill_form()from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
service = ChromeService(executable_path="/path/to/chromedriver")
driver = webdriver.Chrome(service=service)
try:
driver.get("https://example.com")
# 滚动到页面底部
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# 执行其他JavaScript代码
result = driver.execute_script("return document.title;")
print("通过JavaScript获取的标题:", result)
# 修改页面元素样式
driver.execute_script("document.body.style.backgroundColor = 'lightblue';")
finally:
driver.quit()from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.alert import Alert
service = ChromeService(executable_path="/path/to/chromedriver")
driver = webdriver.Chrome(service=service)
try:
driver.get("https://example.com")
# 触发一个alert
driver.execute_script("alert('这是一个测试弹窗');")
# 切换到alert并处理
alert = Alert(driver)
print("弹窗文本:", alert.text)
alert.accept() # 点击确定
# 对于确认框
# alert.dismiss() # 点击取消
finally:
driver.quit()from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
service = ChromeService(executable_path="/path/to/chromedriver")
driver = webdriver.Chrome(service=service)
try:
driver.get("https://www.baidu.com")
# 截取整个页面
driver.save_screenshot("baidu_homepage.png")
# 截取特定元素
search_box = driver.find_element(By.ID, "kw")
search_box.screenshot("search_box.png")
print("截图保存成功!")
finally:
driver.quit()from selenium.common.exceptions import NoSuchElementException, TimeoutException
def safe_find_element(driver, by, value, timeout=10):
"""安全地查找元素,带错误处理"""
try:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
wait = WebDriverWait(driver, timeout)
element = wait.until(EC.presence_of_element_located((by, value)))
return element
except TimeoutException:
print(f"在{timeout}秒内未找到元素: {value}")
return None
except NoSuchElementException:
print(f"元素不存在: {value}")
return None
# 使用示例
# element = safe_find_element(driver, By.ID, "kw")from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
service = ChromeService(executable_path="/path/to/chromedriver")
driver = webdriver.Chrome(service=service)
try:
driver.get("https://example.com")
# 切换到iframe
iframe = driver.find_element(By.TAG_NAME, "iframe")
driver.switch_to.frame(iframe)
# 在iframe内操作
button_in_iframe = driver.find_element(By.ID, "iframe_button")
button_in_iframe.click()
# 切换回主文档
driver.switch_to.default_content()
finally:
driver.quit()from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options
# 配置浏览器选项
chrome_options = Options()
chrome_options.add_argument("--headless") # 无头模式,不显示浏览器窗口
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--window-size=1920,1080")
service = ChromeService(executable_path="/path/to/chromedriver")
driver = webdriver.Chrome(service=service, options=chrome_options)
try:
driver.get("https://www.baidu.com")
print("在无头模式下访问百度")
finally:
driver.quit()import json
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
def save_cookies(driver, filename):
"""保存cookies到文件"""
cookies = driver.get_cookies()
with open(filename, 'w') as file:
json.dump(cookies, file)
print(f"Cookies已保存到 {filename}")
def load_cookies(driver, filename):
"""从文件加载cookies"""
with open(filename, 'r') as file:
cookies = json.load(file)
for cookie in cookies:
driver.add_cookie(cookie)
print(f"Cookies已从 {filename} 加载")
# 使用示例
service = ChromeService(executable_path="/path/to/chromedriver")
driver = webdriver.Chrome(service=service)
try:
driver.get("https://example.com")
# 登录后保存cookies
# save_cookies(driver, "cookies.json")
# 下次访问时加载cookies
# load_cookies(driver, "cookies.json")
# driver.refresh()
finally:
driver.quit()下表列出了 selenium 库的常用方法:
| 方法 | 说明 | 示例代码 |
|---|---|---|
| webdriver.Chrome() | 初始化 Chrome 浏览器实例。 | driver = webdriver.Chrome() |
| driver.get(url) | 访问指定的 URL 地址。 | driver.get("https://example.com") |
| driver.find_element(By, value) | 查找第一个匹配的元素。 | element = driver.find_element(By.ID, "id") |
| driver.find_elements(By, value) | 查找所有匹配的元素。 | elements = driver.find_elements(By.CLASS_NAME, "class") |
| element.click() | 点击元素。 | element.click() |
| element.send_keys(value) | 向输入框中发送键盘输入。 | element.send_keys("text") |
| element.text | 获取元素的文本内容。 | text = element.text |
| driver.back() | 浏览器后退。 | driver.back() |
| driver.forward() | 浏览器前进。 | driver.forward() |
| driver.refresh() | 刷新当前页面。 | driver.refresh() |
| driver.execute_script(script, *args) | 执行 JavaScript 脚本。 | driver.execute_script("alert('Hello!')") |
| driver.switch_to.frame(frame_reference) | 切换到指定的 iframe。 | driver.switch_to.frame("frame_id") |
| driver.switch_to.default_content() | 切换回主文档。 | driver.switch_to.default_content() |
| driver.quit() | 关闭浏览器并退出驱动。 | driver.quit() |
| driver.close() | 关闭当前窗口。 | driver.close() |
Selenium是一个功能强大的网页自动化工具,主要用途包括:
网站测试:自动化功能测试
数据采集:从网站获取数据
任务自动化:自动完成重复性网页操作
网页监控:定期检查网站状态
使用Selenium时要注意:
合理使用等待机制,避免元素找不到的问题
处理各种异常情况,让程序更稳定
遵守网站的robots.txt规则
不要过度频繁访问,避免给网站造成压力
更多Selenium的高级用法和实战案例可以在fly63网站的自动化测试专题中找到。记住,自动化工具要用在合法合规的场景中。
本文内容仅供个人学习/研究/参考使用,不构成任何决策建议或专业指导。分享/转载时请标明原文来源,同时请勿将内容用于商业售卖、虚假宣传等非学习用途哦~感谢您的理解与支持!