Enumeration
URL Path
Custom Script
We can use Python packages including requests
and BeautifulSoup
to crawl the URL links appearing on the site.
url_crawler.py
#!/usr/bin/python3
import click
import requests
import sys
from urllib.parse import urlparse
from bs4 import BeautifulSoup as bs
requests.packages.urllib3.disable_warnings()
class Crawler:
def __init__(self, base_url):
self.links = set()
self.base_url = base_url
self.host = urlparse(base_url).netloc
def crawl(self, url=None, level=0):
if not url:
url = self.base_url
if url in self.links:
return
else:
try:
res = requests.get(url, verify=False)
except:
return
self.links.add(url)
print(' '*level+url)
soup = bs(res.text, 'lxml')
for attr in ['href', 'src']:
elements = [l.get(attr) for l in soup.select(f"[{attr}]") ]
for h in elements:
if h.startswith('/'):
url = self.base_url + h
self.crawl(url, level=level+1)
elif self.host in h:
self.crawl(h, level=level+1)
def write(self, wordlist):
with open(wordlist, 'w') as f:
for l in self.links:
f.write(l+'\n')
@click.command()
@click.argument('url')
@click.argument('output')
def main(url, output):
c = Crawler(url)
c.crawl()
c.write(output)
main()
Last updated