Generic Solution for Forms Validation with Selenium.
I was assigned the task of writing a generic Python script capable of filling out any HTML form, submitting it, and then confirming the success of the form submission, ensuring that our data reaches the backend. The primary objective of this script is to determine whether a given form is a dummy or a functional one.
Creating a generic script for handling all types of forms posed a significant challenge due to the inherent differences between most forms."
Input Types Challenge
1. Diverse Input Fields: Each form may contain varying types of input fields.
2. Multiple Forms on One Page: In some cases, there will be multiple forms on a single page, necessitating the identification of the correct form for data entry.
3. Specific Data Requirements: Certain input fields may demand specific data types or lengths.
4. Human-Like Input Speed: Occasionally, a website or form may require data input at a human-like speed to avoid being flagged as a bot.
5. Scrolling for Access: Some forms may only allow data input after scrolling to the relevant section of the page.
Validation Challenge
Every website employs various methods to validate submitted data, such as checking whether the data in each input field is of the correct type. This validation process can be based on different factors:
- - Type Attribute: Some websites perform validation based on the “type” attribute within the input tag.
- - Placeholder: In other cases, validation is conducted using the placeholder text associated with the input field.
- - Label: Occasionally, input field validation relies on the label, which is typically a preceding tag that provides context for the input field.
Sometime there will be checkboxes, dropdowns, radio buttons and file upload fields. We also need to handle those input fields.
The tricky part will be to capture the API call which will be carrying our submitted data to the database. Through its status code we will whether our data was submitted successfully.
Another crucial aspect of the process involves ensuring that our request to the server is as genuine as possible, minimizing the chances of being flagged as an automated bot. Otherwise, the website may block our access right from the start, denying us access to its pages.
Let us now start our practical part.
First and foremost we will import required libraries.
import logging
import os
from faker import Faker
import re
from selenium import webdriver
from selenium.common.exceptions import (ElementNotInteractableException,
NoSuchElementException)
from selenium.webdriver.common.by import By
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium_stealth import stealth
from src.common.utils import *
After we need to add some additional functionalities to hide selenium bot identity from website.
logging.basicConfig(filename='src\errors_logs\error.log', filemode='w', level=logging.ERROR,
format='%(asctime)s - %(levelname)s - %(message)s')
caps = DesiredCapabilities.CHROME
caps['goog:loggingPrefs'] = {'performance': 'ALL'} # getting all the logs
options = webdriver.ChromeOptions()
# reducing logs to only include network logs
options.add_argument("--enable-logging")
options.add_argument('--log-level=1')
options.add_argument("--disable-notifications")
options.add_argument("-incognito")
options.add_argument("--window-size=1920x1080")
options.add_argument("--start-maximized")
options.add_argument("--disable-extensions")
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
# uncomment the below argument to make brower headless
# options.add_argument("--headless=new")
driver = webdriver.Chrome(desired_capabilities=caps, options=options)
# Setting User Agent
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'})
# Changing the property of the navigator value for webdriver to undefined
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
stealth(driver,
languages=["en-US", "en"],
vendor="Google Inc.",
platform="Win32",
webgl_vendor="Intel Inc.",
renderer="Intel Iris OpenGL Engine",
fix_hairline=True,
)
I also wrote some utilities functions which will needed in our main script. Below I am pasting those function.
Note: Please this script inside this directory because we will be importing it in our main script. “src/common/utils.py”
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import time
from faker import Faker
import random
import string,re
faker = Faker()
# Function for clicking on Submit button
def wait_and_click_elem(driver, locator, selector, t=20)->None:
element = driver.find_element(By.locator, selector)
try:
if locator == "XPATH":
element_present = EC.presence_of_element_located((By.XPATH, selector))
element = WebDriverWait(driver, t).until(element_present)
elif locator == "CSS":
element_present = EC.presence_of_element_located((By.CSS_SELECTOR, selector))
element = WebDriverWait(driver, t).until(element_present)
else:
element_present = EC.presence_of_element_located((By.ID, selector))
element = WebDriverWait(driver, t).until(element_present)
driver.execute_script("arguments[0].scrollIntoView();", element)
time.sleep(1)
driver.execute_script("arguments[0].click();", element)
pass
time.sleep(2)
except:
print("Could not find any click()")
element.click()
pass
def slow_typing(element, text):
for character in text:
element.send_keys(character)
time.sleep(0.3)
def extract_alphanumeric(text):
# Remove non-alphanumeric characters, special characters, and digits
alphanumeric_text = re.sub(r'[^a-zA-Z\s]', '', text)
# Convert to lowercase
lowercase_text = alphanumeric_text.lower().strip()
return lowercase_text
def pause_program() ->None:
# Generate a random floating-point number between 1 and 10
rand_time = random.uniform(1, 2)
# Pause the program for the random amount of time
time.sleep(rand_time)
print("Program paused for", round(rand_time, 2), "seconds.")
def accept_cookies(driver)-> None:
wait = WebDriverWait(driver, 20)
try:
#### Accepting cookies
parent_element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[id="usercentrics-root"]')))
parent_el_shadow_root = parent_element.shadow_root
time.sleep(5)
accept_all_button = parent_el_shadow_root.find_element(By.CSS_SELECTOR, 'button[data-testid="uc-accept-all-button"]')
accept_all_button.click()
print('accepted cookies')
except:
pass
try:
accept_cookies = driver.find_element(By.XPATH,'//button[contains(text()," Accept all cookies")]')
accept_cookies.click()
print("accepted cookies")
except:
pass
def generate_password(length=8)-> str:
"""Generate a random password of the given length."""
password = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(length))
return password
def check_part_in_string(long_string, part):
pattern = re.escape(part)
match = re.search(pattern, long_string, re.IGNORECASE)
return bool(match)
There is another function which will be handling file uploading part. The function take file upload tag and checks whether it accepts any specific file type as a required file type.
Note: You will need place all possible file in the folder.
def handle_file_upload(file_input):
accepted_types = file_input.get_attribute("accept")
if accepted_types:
accepted_type = accepted_types.split(",")[0]
relative_path = f'test{accepted_type}'
absolute_path = os.path.join(current_dir, relative_path)
file_input.send_keys(absolute_path)
else:
relative_path = 'test.txt'
absolute_path = os.path.join(current_dir, relative_path)
file_input.send_keys(absolute_path)
Now I will paste our main function in part and try to explain each then in the end i will paste complete function.
def get_logs(url:str) -> str:
print("Filling out contact us form on this URL--------->", url, "<----------")
driver.get(url)
accept_cookies(driver)
wait = WebDriverWait(driver, 10)
html = driver.page_source
# Finding total available contact forms on the page
contact_forms = driver.find_elements(By.CSS_SELECTOR,'form')
# Finding the form with their number of input fields
max_length = [len(length.find_elements(By.CSS_SELECTOR,'input,textarea')) for length in contact_forms]
# Filtering out the form that have maximum number of input fields
max_index = max_length.index(max(max_length))
form_element = contact_forms[max_index]
#Scrolling to the specific form element
driver.execute_script("arguments[0].scrollIntoView();", form_element)
wait.until(lambda form_element: form_element.execute_script('return document.readyState') == 'complete')
In this function we are taking URL where form is located. First of all we are locating each available form on the page and then filtering our which form has maximum input or textarea tag and assuming that the form which have maximum input or textarea tags is correct form which we need to fill out. Once we selected the correct then we scrolling down where form is located and wait for when form is in ready state.
Pasting below a dictionary through which we map each required input field and input the data.
input_mapping = {
'email': lambda el: slow_typing(el, fake.email()),
'age': lambda el: slow_typing(el, "19"),
'name': lambda el: slow_typing(el, "Drew"),
'website': lambda el: slow_typing(el, "https://www.google.com"),
'tel': lambda el: slow_typing(el, "+923142102920"),
'number': lambda el: slow_typing(el, "+923142102920"),
'phone':lambda el:slow_typing(el,"+923142102920"),
'text': lambda el: slow_typing(el, "This is the test text"),
'url': lambda el: slow_typing(el, "https://www.google.com"),
'date': lambda el: el.send_keys("12/12/2012"),
'subject': lambda el: slow_typing(el, "Hi"),
'message': lambda el: slow_typing(el, "This is test text"),
'password': lambda el: slow_typing(el, password),
'file': lambda el: handle_file_upload(el),
}
Below is part of code in which we are looping over each input field and inputting the required data in the field. There are multiple checks before entering the data in the form. First we check if label(elder sibling of current input filed) is available in out input_mapping dictionary then input the data in current field. Second check is that if label is not in our input_mapping dictionary then fill the by its placeholder.
Note: We did not inputted the data by field’s “type” attribute because sometime input type for all input fields are “text” but the website will be doing JavaScript validation through its either preceding label tag or its placeholder.
After inputting the data we move towards selecting option from dropdown. Checking the checkboxes and filling the textarea tags. after that we finally clicking on submit button.
Finally we are detecting the API calls which were made after we clicked the submit button. In those API calls we are checking if any of these API calls carries our inputted data, based on this we are setting the status which indicates that whether our submitted was a either dummy form or it was proper working form.
for element in form_element.find_elements(By.CSS_SELECTOR, 'input:not([type=hidden]):not([type=checkbox]):not([type=radio]):not([type=submit]):not([type=button]):not([class*=hidden])'):
input_type = element.get_attribute("type")
mininmum_req= element.get_attribute("min")
placeholder = element.get_attribute("placeholder")
input_type = input_type.lower()
placeholder = placeholder.lower()
label =None
try:
label = element.find_element(By.XPATH, "preceding::label[1]")
label =extract_alphanumeric(label.text)
except:
pass
try:
if label in input_mapping:
element.clear()
input_mapping[label](element)
elif placeholder in input_mapping:
element.clear()
input_mapping[placeholder](element)
else:
if input_type =='number' and mininmum_req !='':
element.clear()
element.send_keys(mininmum_req)
else:
if input_type in input_mapping:
element.clear()
input_mapping[input_type](element)
except NoSuchElementException:
print("Element not found.")
except ElementNotInteractableException:
print("Element not interactable.")
print("label is ->" ,label)
print("Type of input is ->", input_type)
except Exception as e:
print(f"Error occurred: {str(e)}")
try:
dropdown =form_element.find_element(By.CSS_SELECTOR,'select')
select = Select(dropdown)
select.select_by_index(1)
except:
pass
try:
form_element.find_element(By.CSS_SELECTOR, 'input[type=radio]').click()
except:
pass
try:
checkboxes =form_element.find_elements(By.CSS_SELECTOR, 'input[type=checkbox]')
for checkbox in checkboxes:
checkbox.click()
except:
pass
try:
textareas=form_element.find_elements(By.CSS_SELECTOR,'textarea')
for textarea in textareas:
textarea.clear()
slow_typing(textarea, " This is the test text" )
except:
pass
# finally clicking on submit button
form_element.find_element(By.CSS_SELECTOR,'[type=submit],[type=button]').click()
pause_program()
performance_log =driver.get_log('performance')
status = None
part= 'test text'
part2 = 'Drew'
for d in performance_log:
if check_part_in_string(str(d), part2):
# print(str(d))
status = 200
break
elif check_part_in_string(str(d), part):
status=200
break
elif 'recaptcha' in html:
status =403
break
else:
status = 500
if status ==200:
return {"status":status, "messsage":"success"}
elif status == 403:
return {"status":status, "message": "Failed by captcha"}
elif status ==500:
return {"status":status , "message": "Dummy form"}
else:
pass
Here is the complete code .
main.py
import logging
import os
from faker import Faker
import re
from selenium import webdriver
from selenium.common.exceptions import (ElementNotInteractableException,
NoSuchElementException)
from selenium.webdriver.common.by import By
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium_stealth import stealth
from src.common.utils import *
# Get the current working directory
current_dir = os.getcwd()
# Configure the logging settings
logging.basicConfig(filename='src\errors_logs\error.log', filemode='w', level=logging.ERROR,
format='%(asctime)s - %(levelname)s - %(message)s')
caps = DesiredCapabilities.CHROME
caps['goog:loggingPrefs'] = {'performance': 'ALL'} # getting all the logs
options = webdriver.ChromeOptions()
# reducing logs to only include network logs
options.add_argument("--enable-logging")
options.add_argument('--log-level=1')
options.add_argument("--disable-notifications")
options.add_argument("-incognito")
options.add_argument("--window-size=1920x1080")
options.add_argument("--start-maximized")
options.add_argument("--disable-extensions")
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
# uncomment the below argument to make brower headless
# options.add_argument("--headless=new")
driver = webdriver.Chrome(desired_capabilities=caps, options=options)
# Setting User Agent
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'})
# Changing the property of the navigator value for webdriver to undefined
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
stealth(driver,
languages=["en-US", "en"],
vendor="Google Inc.",
platform="Win32",
webgl_vendor="Intel Inc.",
renderer="Intel Iris OpenGL Engine",
fix_hairline=True,
)
fake = Faker()
name = fake.name()
text = fake.text()
password = generate_password()
def handle_file_upload(file_input):
accepted_types = file_input.get_attribute("accept")
if accepted_types:
accepted_type = accepted_types.split(",")[0]
relative_path = f'test{accepted_type}'
absolute_path = os.path.join(current_dir, relative_path)
file_input.send_keys(absolute_path)
else:
relative_path = 'test.txt'
absolute_path = os.path.join(current_dir, relative_path)
file_input.send_keys(absolute_path)
def get_logs(url:str) -> str:
print("Filling out contact us form on this URL--------->", url, "<----------")
driver.get(url)
accept_cookies(driver)
wait = WebDriverWait(driver, 10)
html = driver.page_source
# Finding total available contact forms on the page
contact_forms = driver.find_elements(By.CSS_SELECTOR,'form')
# Finding the form with their number of input fields
max_length = [len(length.find_elements(By.CSS_SELECTOR,'input,textarea')) for length in contact_forms]
# Filtering out the form that have maximum number of input fields
max_index = max_length.index(max(max_length))
form_element = contact_forms[max_index]
#Scrolling to the specific form element
driver.execute_script("arguments[0].scrollIntoView();", form_element)
wait.until(lambda form_element: form_element.execute_script('return document.readyState') == 'complete')
input_mapping = {
'email': lambda el: slow_typing(el, fake.email()),
'age': lambda el: slow_typing(el, "19"),
'name': lambda el: slow_typing(el, "Drew"),
'website': lambda el: slow_typing(el, "https://www.google.com"),
'tel': lambda el: slow_typing(el, "+923142102920"),
'number': lambda el: slow_typing(el, "+923142102920"),
'phone':lambda el:slow_typing(el,"+923142102920"),
'text': lambda el: slow_typing(el, "This is the test text"),
'url': lambda el: slow_typing(el, "https://www.google.com"),
'date': lambda el: el.send_keys("12/12/2012"),
'subject': lambda el: slow_typing(el, "Hi"),
'message': lambda el: slow_typing(el, "This is test text"),
'password': lambda el: slow_typing(el, password),
'file': lambda el: handle_file_upload(el),
}
for element in form_element.find_elements(By.CSS_SELECTOR, 'input:not([type=hidden]):not([type=checkbox]):not([type=radio]):not([type=submit]):not([type=button]):not([class*=hidden])'):
input_type = element.get_attribute("type")
mininmum_req= element.get_attribute("min")
placeholder = element.get_attribute("placeholder")
input_type = input_type.lower()
placeholder = placeholder.lower()
# print(type(mininmum_req))
label =None
try:
label = element.find_element(By.XPATH, "preceding::label[1]")
label =extract_alphanumeric(label.text)
# print(label)
except:
# label=None
pass
try:
# if label is not None:
# print(label)
# print(element.get_attribute("outerHTML"))
if label in input_mapping:
element.clear()
input_mapping[label](element)
elif placeholder in input_mapping:
element.clear()
input_mapping[placeholder](element)
else:
if input_type =='number' and mininmum_req !='':
element.clear()
element.send_keys(mininmum_req)
else:
if input_type in input_mapping:
element.clear()
input_mapping[input_type](element)
except NoSuchElementException:
print("Element not found.")
except ElementNotInteractableException:
print("Element not interactable.")
print("label is ->" ,label)
print("Type of input is ->", input_type)
except Exception as e:
print(f"Error occurred: {str(e)}")
try:
dropdown =form_element.find_element(By.CSS_SELECTOR,'select')
select = Select(dropdown)
select.select_by_index(1)
except:
pass
try:
form_element.find_element(By.CSS_SELECTOR, 'input[type=radio]').click()
except:
pass
try:
checkboxes =form_element.find_elements(By.CSS_SELECTOR, 'input[type=checkbox]')
for checkbox in checkboxes:
checkbox.click()
except:
pass
try:
textareas=form_element.find_elements(By.CSS_SELECTOR,'textarea')
for textarea in textareas:
textarea.clear()
slow_typing(textarea, " This is the test text" )
except:
pass
# finally clicking on submit button
form_element.find_element(By.CSS_SELECTOR,'[type=submit],[type=button]').click()
# wait_and_click_elem(form_element, "CSS", "[type = submit]")
pause_program()
performance_log =driver.get_log('performance')
status = None
part= 'test text'
part2 = 'Drew'
for d in performance_log:
if check_part_in_string(str(d), part2):
# print(str(d))
status = 200
break
elif check_part_in_string(str(d), part):
status=200
break
# print(str(d))
# break
elif 'recaptcha' in html:
status =403
break
else:
status = 500
if status ==200:
return {"status":status, "messsage":"success"}
elif status == 403:
return {"status":status, "message": "Failed by captcha"}
elif status ==500:
return {"status":status , "message": "Dummy form"}
else:
pass
if __name__ == "__main__":
urls = ['https://heliosauto.dk/bestildemo/',"https://bookvino.com/session/signupWineryOwner",
"https://bookvino.com/contact-us","https://stage.kkmarketing.dk/",'https://ipo.com.pk/contact/',"https://kkmarketing.dk/test/",
]
for url in urls:
print(get_logs(url))
utils.py
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import time
from faker import Faker
import random
import string,re
faker = Faker()
# Function for clicking on Submit button
def wait_and_click_elem(driver, locator, selector, t=20)->None:
element = driver.find_element(By.locator, selector)
try:
if locator == "XPATH":
element_present = EC.presence_of_element_located((By.XPATH, selector))
element = WebDriverWait(driver, t).until(element_present)
elif locator == "CSS":
element_present = EC.presence_of_element_located((By.CSS_SELECTOR, selector))
element = WebDriverWait(driver, t).until(element_present)
else:
element_present = EC.presence_of_element_located((By.ID, selector))
element = WebDriverWait(driver, t).until(element_present)
driver.execute_script("arguments[0].scrollIntoView();", element)
time.sleep(1)
driver.execute_script("arguments[0].click();", element)
pass
time.sleep(2)
except:
print("Could not find any click()")
element.click()
pass
def slow_typing(element, text):
for character in text:
element.send_keys(character)
time.sleep(0.3)
def extract_alphanumeric(text):
# Remove non-alphanumeric characters, special characters, and digits
alphanumeric_text = re.sub(r'[^a-zA-Z\s]', '', text)
# Convert to lowercase
lowercase_text = alphanumeric_text.lower().strip()
return lowercase_text
def pause_program() ->None:
# Generate a random floating-point number between 1 and 10
rand_time = random.uniform(1, 2)
# Pause the program for the random amount of time
time.sleep(rand_time)
print("Program paused for", round(rand_time, 2), "seconds.")
def accept_cookies(driver)-> None:
wait = WebDriverWait(driver, 20)
try:
#### Accepting cookies
parent_element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[id="usercentrics-root"]')))
parent_el_shadow_root = parent_element.shadow_root
time.sleep(5)
accept_all_button = parent_el_shadow_root.find_element(By.CSS_SELECTOR, 'button[data-testid="uc-accept-all-button"]')
accept_all_button.click()
print('accepted cookies')
except:
pass
try:
accept_cookies = driver.find_element(By.XPATH,'//button[contains(text()," Accept all cookies")]')
accept_cookies.click()
print("accepted cookies")
except:
pass
def generate_password(length=8)-> str:
"""Generate a random password of the given length."""
password = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(length))
return password
def check_part_in_string(long_string, part):
pattern = re.escape(part)
match = re.search(pattern, long_string, re.IGNORECASE)
return bool(match)
Hope that was helpful! Do keep an eye out for more such articles in the future! Here’s the link to my GitHub profile: https://github.com/faheem77
Thanks for stopping by! Happy Learning!
Please contact me through LinkedIn or follow me on twitter .