Web Scrapping Scripts

Scrapping Bug Bounty Sites

import requests
from bs4 import BeautifulSoup

# URL to scrape
url = "https://www.vulnerability-lab.com/list-of-bug-bounty-programs.php"

# Set headers to mimic a browser request
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like>
}

# Send a GET request with headers
response = requests.get(url, headers=headers)
if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        tables = soup.find_all('table')
        a_tags = tables[4].find_all('a')
        sites_list = open('bug-bounty-list.txt', 'w')

        for a in a_tags:
                #print(a)
                sites_list.write(a.get('href')+'\n')
else:
    print(f"Failed to retrieve content. Status code: {response.status_code}")

Getting the Domains

site_list = open('bug-bounty-list.txt', 'r')
sites = site_list.readlines()

domain_list = open('bug-bounty-domains.txt', 'w')

for site in sites:
        if not 'mailto' in site:
                split_site = site.split('/')
                if len(split_site)>1:
                        domain_list.write(split_site[2]+'\n')

Getting the Keywords

import tldextract

domain_list = open('bug-bounty-domains.txt', 'r')
word_list = open('bug-bounty-wordlist.txt', 'w')

for domain in domain_list.readlines():
        tld = tldextract.extract(domain)
        word_list.write(tld.domain+'\n')

Last updated