#!/usr/bin/env ruby
# Page Pattern Generator
#  Aung Khant, http://yehg.net

# Feed a url
# Get its tag pattern that's ready to use in your plugin

# Codes taken from Andrew Horton

require 'net/http'
require 'net/https'
require 'open-uri'

if RUBY_VERSION =~ /^1\.9/ || RUBY_VERSION =~ /^2\./
  require 'digest/md5'
else
  require 'md5'
end


$VERBOSE = false

def main
  if ARGV.length < 1
    usage
    exit 1
  end
  banner
  req = fetch_url(ARGV[0].to_s)
  return if req.nil?
  res = generate_pattern(req.body)
  if res.empty?
    puts "[-] Error: No matches"
  else
    puts "[+] Matches:"
    puts res.flatten.join(",\n")
  end
end

# from tag_pattern.rb
def pg_tag_pattern(b)
  return if b.nil?
  # remove stuff between script and /script
  # don't bother with  !--, --> or noscript and /noscript
  inscript=false

  tag_pattern = b.scan(/<([^\s>]*)/).flatten.map {|x| x.downcase!; r=nil;
    r=x unless inscript
    if x == 'script'
      inscript=true
    elsif x == '/script'
      inscript=false
      r=x
    end
    r
  }.compact.join(",")
end

def banner
  puts "
== Page Pattern Generator 0.1 for WhatWeb == 
        by Aung Khant, http://yehg.net

"
end

def usage
  puts "Usage: ./get-pattern http://www.example.com/\n"
end

def fetch_url(url)
  puts "[*] URL: #{url}"
  url = 'http://' + url if url !~ /^https?:\/\//
  uri = URI.parse(url)
  uri.path += '/' if uri.path.size == 0

  http = Net::HTTP.new(uri.host,uri.port)
  http.open_timeout = 180
  http.read_timeout = 180
  if uri.scheme == 'https'
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_NONE # insecure
  end

  begin 
    req = http.get(uri.path)
  rescue
    puts "[-] Error: Could not connect to URL - #{uri}"
  end
  req
end

def generate_pattern(body)
  res = []
  title = body.scan(/(<title>[^<]+<\/title>)/i).flatten.first
  if title.nil?
    puts "[-] Error: Could not title match - no title in response body" if $VERBOSE
  else
    res << {:name => 'HTML Page Title', :text => title.to_s}
  end
  pattern = pg_tag_pattern(body)
  if pattern.length > 0
    res << {:name => 'HTML Tag Pattern', :tagpattern => pattern.to_s}
  else
    puts "[-] Error: Could not generate tag pattern - no tags in response body" if $VERBOSE
  end
  md5 = Digest::MD5.hexdigest(body)
  if md5 == 'd41d8cd98f00b204e9800998ecf8427e'
    puts "[-] Error: MD5 hash is hash of a blank page" if $VERBOSE
  else
    res << {:name => 'MD5 hash', :md5 => md5}
  end
  res
end

main
