📚 4 min read
Selenium Guide
Selenium WebDriver is a widely-used tool for browser automation that enables you to control browser behavior programmatically and run automated tests across different browsers.
Key Features
- Multi-browser support
- Multiple programming language bindings
- Extensive browser manipulation capabilities
- Support for complex user interactions
- Headless browser testing
- Screenshot capture
- Page object model support
- Wait strategies
Getting Started
bash
# Install Selenium WebDriver for Node.js
npm install selenium-webdriver
# Install browser drivers
npm install chromedriver geckodriver
Basic Test Structure
javascript
const { Builder, By, Key, until } = require('selenium-webdriver');
describe('Search functionality', function () {
let driver;
beforeEach(async function () {
driver = await new Builder().forBrowser('chrome').build();
});
afterEach(async function () {
await driver.quit();
});
it('should search and find results', async function () {
await driver.get('https://example.com');
await driver.findElement(By.name('q')).sendKeys('selenium', Key.RETURN);
await driver.wait(until.elementLocated(By.css('.results')));
const results = await driver.findElements(By.css('.result-item'));
expect(results.length).toBeGreaterThan(0);
});
});
Common Operations
javascript
// Navigation
await driver.get('https://example.com');
await driver.navigate().back();
await driver.navigate().refresh();
// Finding elements
const element = await driver.findElement(By.id('search'));
const elements = await driver.findElements(By.css('.item'));
const link = await driver.findElement(By.linkText('Click here'));
// Interactions
await element.click();
await element.sendKeys('text to type');
await element.clear();
await element.submit();
// Waits
await driver.wait(until.elementLocated(By.id('results')), 5000);
await driver.wait(until.elementIsVisible(element), 5000);
await driver.wait(until.titleIs('Page Title'), 5000);
// JavaScript execution
await driver.executeScript('return document.title;');
await driver.executeAsyncScript('window.setTimeout(arguments[0], 500);');
Page Object Model Example
javascript
class LoginPage {
constructor(driver) {
this.driver = driver;
this.usernameInput = By.id('username');
this.passwordInput = By.id('password');
this.loginButton = By.css('button[type="submit"]');
}
async login(username, password) {
await this.driver.findElement(this.usernameInput).sendKeys(username);
await this.driver.findElement(this.passwordInput).sendKeys(password);
await this.driver.findElement(this.loginButton).click();
await this.driver.wait(
until.urlContains('/dashboard'),
5000,
'Dashboard page did not load'
);
}
}
Core Concepts
WebDriver Setup
python
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument('--headless') # Run in headless mode
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
# Initialize WebDriver
service = Service('path/to/chromedriver')
driver = webdriver.Chrome(service=service, options=chrome_options)
# Set implicit wait time
driver.implicitly_wait(10)
Element Location
python
from selenium.webdriver.common.by import By
# Different locator strategies
driver.find_element(By.ID, 'search')
driver.find_element(By.NAME, 'q')
driver.find_element(By.CLASS_NAME, 'search-input')
driver.find_element(By.CSS_SELECTOR, '#search-form input')
driver.find_element(By.XPATH, "//input[@type='search']")
# Find multiple elements
elements = driver.find_elements(By.TAG_NAME, 'a')
Interactions
python
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
# Basic interactions
element.click()
element.send_keys('text')
element.clear()
element.submit()
# Complex interactions
actions = ActionChains(driver)
actions.move_to_element(element)
actions.click_and_hold()
actions.drag_and_drop(source, target)
actions.key_down(Keys.CONTROL).click(element).key_up(Keys.CONTROL)
actions.perform()
Advanced Features
Wait Strategies
python
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# Explicit wait
wait = WebDriverWait(driver, timeout=10)
element = wait.until(
EC.presence_of_element_located((By.ID, 'myDynamicElement'))
)
# Custom wait condition
def element_has_css_class(locator, css_class):
def _predicate(driver):
element = driver.find_element(*locator)
return css_class in element.get_attribute("class")
return _predicate
wait.until(element_has_css_class((By.ID, 'myElement'), 'active'))
JavaScript Execution
python
# Execute JavaScript
driver.execute_script("return document.title;")
# Scroll into view
element = driver.find_element(By.ID, 'bottom')
driver.execute_script("arguments[0].scrollIntoView(true);", element)
# Modify DOM
driver.execute_script("""
let div = document.createElement('div');
div.innerHTML = 'New Element';
document.body.appendChild(div);
""")
Window Management
python
# Handle multiple windows
main_window = driver.current_window_handle
driver.switch_to.new_window('tab')
# Switch between windows
for handle in driver.window_handles:
driver.switch_to.window(handle)
if 'Expected Title' in driver.title:
break
# Frame handling
driver.switch_to.frame('frame_name')
driver.switch_to.default_content()
Testing Patterns
Page Object Model
python
class LoginPage:
def __init__(self, driver):
self.driver = driver
self.username_input = (By.ID, 'username')
self.password_input = (By.ID, 'password')
self.login_button = (By.CSS_SELECTOR, 'button[type="submit"]')
def login(self, username, password):
self.driver.find_element(*self.username_input).send_keys(username)
self.driver.find_element(*self.password_input).send_keys(password)
self.driver.find_element(*self.login_button).click()
Data-Driven Testing
python
import pytest
import csv
def read_test_data():
data = []
with open('test_data.csv', 'r') as file:
reader = csv.DictReader(file)
for row in reader:
data.append(row)
return data
@pytest.mark.parametrize('test_data', read_test_data())
def test_login(driver, test_data):
login_page = LoginPage(driver)
login_page.login(test_data['username'], test_data['password'])
if test_data['expected_result'] == 'success':
assert driver.current_url == '/dashboard'
else:
assert login_page.get_error_message() == test_data['error_message']
Advanced Testing
API Integration
python
import requests
class TestUserFlow:
def setup_method(self):
# Create test data via API
response = requests.post(
'https://api.example.com/users',
json={'name': 'Test User', 'email': 'test@example.com'}
)
self.test_user = response.json()
def test_user_profile(self, driver):
# Login via UI
login_page = LoginPage(driver)
login_page.login(self.test_user['email'], 'password')
# Verify profile data
profile_element = driver.find_element(By.CLASS_NAME, 'profile')
assert self.test_user['name'] in profile_element.text
def teardown_method(self):
# Cleanup test data
requests.delete(f"https://api.example.com/users/{self.test_user['id']}")
Screenshot Capture
python
import os
from datetime import datetime
def take_screenshot(driver, name):
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
screenshot_dir = 'screenshots'
if not os.path.exists(screenshot_dir):
os.makedirs(screenshot_dir)
filename = f"{screenshot_dir}/{name}_{timestamp}.png"
driver.save_screenshot(filename)
return filename
# Usage in test
def test_with_screenshot(driver):
try:
# Test steps
driver.get('https://example.com')
assert 'Expected Title' in driver.title
except AssertionError:
take_screenshot(driver, 'test_failure')
raise
Best Practices
1. Element Location
python
# ❌ Avoid
driver.find_element(By.CSS_SELECTOR, 'button:nth-child(2)')
driver.find_element(By.XPATH, '//div[contains(@class, "btn")][2]')
# ✅ Prefer
driver.find_element(By.ID, 'submit-button')
driver.find_element(By.NAME, 'email')
driver.find_element(By.CSS_SELECTOR, '[data-testid="submit"]')
2. Waits
python
# ❌ Avoid
import time
time.sleep(5)
# ✅ Prefer
wait = WebDriverWait(driver, 10)
wait.until(EC.element_to_be_clickable((By.ID, 'button')))
wait.until(EC.visibility_of_element_located((By.CLASS_NAME, 'modal')))
3. Error Handling
python
from selenium.common.exceptions import TimeoutException, NoSuchElementException
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, 'dynamic-content'))
)
except TimeoutException:
take_screenshot(driver, 'timeout_error')
raise
except NoSuchElementException as e:
logger.error(f"Element not found: {e}")
raise
4. Resource Management
python
class TestBase:
@classmethod
def setup_class(cls):
cls.driver = webdriver.Chrome()
cls.driver.maximize_window()
@classmethod
def teardown_class(cls):
if cls.driver:
cls.driver.quit()
def setup_method(self):
self.driver.delete_all_cookies()
self.driver.get(self.base_url)