180 lines
5.2 KiB
Lua
180 lines
5.2 KiB
Lua
description = [[
|
|
Spiders a web server and displays its directory structure along with
|
|
number and types of files in each folder. Note that files listed as
|
|
having an 'Other' extension are ones that have no extension or that
|
|
are a root document.
|
|
]]
|
|
|
|
---
|
|
-- @usage
|
|
-- nmap --script http-sitemap-generator -p 80 <host>
|
|
--
|
|
--
|
|
-- @output
|
|
-- PORT STATE SERVICE REASON
|
|
-- 80/tcp open http syn-ack
|
|
-- | http-sitemap-generator:
|
|
-- | Directory structure:
|
|
-- | /
|
|
-- | Other: 1
|
|
-- | /images/
|
|
-- | png: 1
|
|
-- | /shared/css/
|
|
-- | css: 1
|
|
-- | /shared/images/
|
|
-- | gif: 1; png: 1
|
|
-- | Longest directory structure:
|
|
-- | Depth: 2
|
|
-- | Dir: /shared/css/
|
|
-- | Total files found (by extension):
|
|
-- |_ Other: 1; css: 1; gif: 1; png: 2
|
|
--
|
|
-- @args http-sitemap-generator.maxdepth the maximum amount of directories beneath
|
|
-- the initial url to spider. A negative value disables the limit.
|
|
-- (default: 3)
|
|
-- @args http-sitemap-generator.maxpagecount the maximum amount of pages to visit.
|
|
-- A negative value disables the limit (default: 20)
|
|
-- @args http-sitemap-generator.url the url to start spidering. This is a URL
|
|
-- relative to the scanned host eg. /default.html (default: /)
|
|
-- @args http-sitemap-generator.withinhost only spider URLs within the same host.
|
|
-- (default: true)
|
|
-- @args http-sitemap-generator.withindomain only spider URLs within the same
|
|
-- domain. This widens the scope from <code>withinhost</code> and can
|
|
-- not be used in combination. (default: false)
|
|
--
|
|
|
|
author = "Piotr Olma"
|
|
license = "Same as Nmap--See https://nmap.org/book/man-legal.html"
|
|
categories = {"discovery", "intrusive"}
|
|
|
|
local shortport = require 'shortport'
|
|
local stdnse = require 'stdnse'
|
|
local url = require 'url'
|
|
local httpspider = require 'httpspider'
|
|
local string = require 'string'
|
|
local table = require 'table'
|
|
|
|
portrule = shortport.port_or_service( {80, 443}, {"http", "https"}, "tcp", "open")
|
|
|
|
local function dict_add(d, k, v)
|
|
if not d[k] then
|
|
d[k] = {}
|
|
d[k][v] = 1
|
|
elseif d[k][v] then
|
|
d[k][v] = d[k][v]+1
|
|
else
|
|
d[k][v] = 1
|
|
end
|
|
end
|
|
|
|
local function map(f, t)
|
|
local new_t = {}
|
|
for _,v in ipairs(t) do
|
|
new_t[#new_t+1] = f(v)
|
|
end
|
|
return new_t
|
|
end
|
|
|
|
local function sort_dirs(t)
|
|
local keys_table = {}
|
|
for k,_ in pairs(t) do
|
|
keys_table[#keys_table+1] = k
|
|
end
|
|
table.sort(keys_table)
|
|
local newdirs = {}
|
|
map(function(d) newdirs[#newdirs+1]={d, t[d]} end, keys_table)
|
|
return newdirs
|
|
end
|
|
|
|
local function sort_by_keys(t)
|
|
local keys_table = {}
|
|
for k,_ in pairs(t) do
|
|
keys_table[#keys_table+1] = k
|
|
end
|
|
table.sort(keys_table)
|
|
return map(function(e) return e..": "..tostring(t[e]) end, keys_table)
|
|
end
|
|
|
|
local function internal_table_to_output(t)
|
|
local output = {}
|
|
for _,dir in ipairs(t) do
|
|
local ext_and_occurrences = sort_by_keys(dir[2])
|
|
output[#output+1] = {name=dir[1], table.concat(ext_and_occurrences, "; ")}
|
|
end
|
|
return output
|
|
end
|
|
|
|
local function get_file_extension(f)
|
|
return string.match(f, ".-/.-%.([^/%.]*)$") or "Other"
|
|
end
|
|
|
|
-- removes /../ and /./ from paths; for example
|
|
-- normalize_path("/a/v/../../da/as/d/a/a/aa/../") -> "/da/as/d/a/a/"
|
|
local function normalize_path(p)
|
|
local n=0
|
|
p = p:gsub("/%.%f[/]", "")
|
|
p = p:gsub("/%.$", "/")
|
|
repeat
|
|
p, n = string.gsub(p, "/[^/]-/%.%.", "")
|
|
until n==0
|
|
return p
|
|
end
|
|
|
|
function action(host, port)
|
|
local starting_url = stdnse.get_script_args('http-sitemap-generator.url') or "/"
|
|
|
|
-- create a new crawler instance
|
|
local crawler = httpspider.Crawler:new( host, port, nil, { scriptname = SCRIPT_NAME, noblacklist=true, useheadfornonwebfiles=true } )
|
|
|
|
if ( not(crawler) ) then
|
|
return
|
|
end
|
|
|
|
local visited = {}
|
|
local dir_structure = {}
|
|
local total_ext = {}
|
|
local longest_dir_structure = {dir="/", depth=0}
|
|
while(true) do
|
|
local status, r = crawler:crawl()
|
|
|
|
if ( not(status) ) then
|
|
if ( r.err ) then
|
|
return stdnse.format_output(false, r.reason)
|
|
else
|
|
break
|
|
end
|
|
end
|
|
if r.response.status and r.response.status == 200 then
|
|
--check if we've already visited this file
|
|
local path = normalize_path(r.url.path)
|
|
if not visited[path] then
|
|
local ext = get_file_extension(path)
|
|
if total_ext[ext] then total_ext[ext]=total_ext[ext]+1 else total_ext[ext]=1 end
|
|
local dir = normalize_path(r.url.dir)
|
|
local _,dir_depth = string.gsub(dir,"/","/")
|
|
-- check if this path is the longest one
|
|
dir_depth = dir_depth - 1 -- first '/'
|
|
if dir_depth > longest_dir_structure["depth"] then
|
|
longest_dir_structure["dir"] = dir
|
|
longest_dir_structure["depth"] = dir_depth
|
|
end
|
|
dict_add(dir_structure, dir, ext)
|
|
-- when withinhost=false, then maybe we'd like to include the full url
|
|
-- with each path listed in the output
|
|
visited[path] = true
|
|
end
|
|
end
|
|
end
|
|
|
|
local out = internal_table_to_output(sort_dirs(dir_structure))
|
|
local tot = sort_by_keys(total_ext)
|
|
out =
|
|
{
|
|
"Directory structure:", out,
|
|
{name="Longest directory structure:", "Depth: "..tostring(longest_dir_structure.depth), "Dir: "..longest_dir_structure.dir},
|
|
{name="Total files found (by extension):", table.concat(tot, "; ")}
|
|
}
|
|
return stdnse.format_output(true, out)
|
|
end
|
|
|