/*
  # ***** BEGIN LICENSE BLOCK *****
  # Version: MPL 1.1/GPL 2.0/LGPL 2.1
  #
  # The contents of this file are subject to the Mozilla Public License Version
  # 1.1 (the "License"); you may not use this file except in compliance with
  # the License. You may obtain a copy of the License at
  # http://www.mozilla.org/MPL/
  #
  # Software distributed under the License is distributed on an "AS IS" basis,
  # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  # for the specific language governing rights and limitations under the
  # License.
  #
  # The Original Code is Microsummarizer.
  #
  # The Initial Developer of the Original Code is Mozilla.
  # Portions created by the Initial Developer are Copyright (C) 2006
  # the Initial Developer. All Rights Reserved.
  #
  # Contributor(s):
  #  Myk Melez <myk@mozilla.org> (Original Author)
  #  Simon Bünzli <zeniko@gmail.com>
  #  Asaf Romano <mano@mozilla.com>
  #  Dan Mills <thunder@mozilla.com>
  #  Ryan Flint <rflint@dslr.net>
  #
  # Alternatively, the contents of this file may be used under the terms of
  # either the GNU General Public License Version 2 or later (the "GPL"), or
  # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  # in which case the provisions of the GPL or the LGPL are applicable instead
  # of those above. If you wish to allow use of your version of this file only
  # under the terms of either the GPL or the LGPL, and not to allow others to
  # use your version of this file under the terms of the MPL, indicate your
  # decision by deleting the provisions above and replace them with the notice
  # and other provisions required by the GPL or the LGPL. If you do not delete
  # the provisions above, a recipient may use your version of this file under
  # the terms of any one of the MPL, the GPL or the LGPL.
  #
  # ***** END LICENSE BLOCK *****
*/

/*
 * The original file is located here:
 * http://mxr.mozilla.org/mozilla/source/browser/components/microsummaries/src/nsMicrosummaryService.js?raw=1
 *
 */

/**
 * GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
 * *
 * Copyright (C) 2011, 2012 Loic J. Duros
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see  <http://www.gnu.org/licenses/>.
 *
 */

/**
 * html_parser
 *
 * Takes in an http response (string), loads it into a secured iframe
 * so that it can be manipulated as a DOM object. It then returns a
 * modified string to be passed along as a replacement of the original
 * response.
 *
 */

var {Cc, Ci, Cu} = require("chrome");

var jsChecker = require("js_checker/js_checker.js");
var domHandlerModule = require("html_script_finder/dom_handler");

var removedScripts = require("script_entries/removed_scripts").removedScripts;

Cu.import("resource://gre/modules/XPCOMUtils.jsm"); 

var windowMediator = Cc['@mozilla.org/appshell/window-mediator;1'].
    getService(Ci.nsIWindowMediator);
var window = windowMediator.getMostRecentWindow("navigator:browser");

const PR_UINT32_MAX = 2147483647;

// Helper function for XPCOM instanciation.
function CCIN(cName, ifaceName) {
    return Cc[cName].createInstance(Ci[ifaceName]);
}

/**
 *
 * utils
 * 
 * takes care of encoding and decoding utf8, generate a dataURI to
 * load page in a sandbox iframe
 *
 */
var utils = {

    /**
     * encode_utf8
     * @param s string 
     *
     */
    encode_utf8: function (s) {
	return unescape(encodeURIComponent(s));
    },

    /**
     * decode_utf8
     * @param s string 
     *
     */
    decode_utf8: function (s) {
	return decodeURIComponent(escape(s));
    },

    /** 
     * generateDataURI
     * takes an html string, a character set and a url.
     * returns a base64 encoded data URI.
     */
    generateDataURI: function (data, charset, url) {

	var metaCharsetRegex, metaCharset;

	var utf8ToB64 = function ( str ) {
	    return window.btoa(str);
	}
	
	var encoded = utf8ToB64(data);

	if (charset != '' && charset != undefined) {
	    var datauri =  "data:text/html;charset=" + charset + ";base64," + encoded;
	} else {
	    var datauri =  "data:text/html;base64," + encoded;
	}
	
	return datauri;
    },

    /**
     *
     * reconstitueDoctype
     * Takes a document.doctype node and reconstitute
     * a serialized doctype using its properties.
     * returns a string that corresponds to the doctype
     * in the source of the page.
     * 
     * @param doctypeNode The doctype element.
     * @return string The doctype string.
     */
    reconstituteDoctype: function (doctypeNode) {

	var doctype = "<!DOCTYPE HTML>";

	if (doctypeNode != undefined &&
	    doctypeNode.publicId != "" && 
	    doctypeNode.systemId != "") {
	    
	    doctype = "<!DOCTYPE ";

	    doctype += doctypeNode.name + ' PUBLIC "' + doctypeNode.publicId + '" "' + doctypeNode.systemId + '">';

	}
	return doctype;
    },

    /**
     * reconstituteHtmlString
     * Take the whole <html> node, deep clone it,
     * place the clone in a div, get the innerHTML from
     * that div. Get the doctype as well and return it all
     * as a string.
     *
     * @param aDocument obj the DOM to be serialized.
     * @return the string.
     */
    reconstituteHtmlString: function (aDocument) {
	var wrapperDiv = aDocument.createElement('div'),
	doctype = this.reconstituteDoctype(aDocument.doctype);
	htmlNode = aDocument.getElementsByTagName('html')[0];
	cloneHtml = htmlNode.cloneNode(true),
	newData;
	
	wrapperDiv.appendChild(cloneHtml);
	
	var newData = doctype + '\n' + wrapperDiv.innerHTML;
	
	if (aDocument.characterSet.toLowerCase() === 'utf-8') {
	    newData = utils.encode_utf8(newData);
	}
	return newData;
    },
    /**
     * serializeToStream
     * Serializes an HTML DOM into a binary stream. Uses
     * nsIDOMSerializer only as a backup to when the
     * reconstituteHtmlString method fails (not sure if/when it
     * happens).
     * @param dom obj Reference to the dom object
     * @param that obj Reference to the object returned by htmlParser.
     * This allows to give access to the iframe.
     * @return a binary stream.
     */
    serializeToStream: function (dom, that) {
	var newData,
	    len;

	try {
	    var storageStream = CCIN("@mozilla.org/storagestream;1", "nsIStorageStream");
	    var binaryOutputStream = CCIN("@mozilla.org/binaryoutputstream;1",
					  "nsIBinaryOutputStream");
	    var serializer = Components.classes["@mozilla.org/xmlextras/xmlserializer;1"].createInstance(Components.interfaces.nsIDOMSerializer);
	} catch (e) {
	    console.log('issue with serializer', e);
	}

	try {
	    storageStream.init(8192, PR_UINT32_MAX, null);
	    binaryOutputStream.setOutputStream(storageStream.getOutputStream(0));

	    // stuff here.
	    newData = utils.reconstituteHtmlString(dom);

	    len = newData.length;
	    binaryOutputStream.writeBytes(newData, len);
	    if (that.Iframe.parentNode) {
		that.Iframe.parentNode.removeChild(that.Iframe);
	    }
	    return storageStream;
	} catch (e) {
	    console.log('issue with serializeToStream, using xmlserializer', e);
	    return serializer.serializeToStream(dom, binaryOutputStream, null);
	}	
	
    }    
}

exports.htmlParser = function () {

    return {
	charset: null,
	htmlText: null,
	pageURL: null,
	fragment: null,

	responseStatus: null,

	/**
	 * Parse a string of HTML text.
	 * 
	 * @param   htmlText
	 *          a string containing the HTML content
	 *
	 */
	Parse: function (htmlText, charset, url, fragment, responseStatus, parseResult) {
	    // Find a window to stick our hidden iframe into.
	    try {
		// try to decode utf8 to prevent
		// special char bug, if it fails,
		// set it as is.
		this.htmlText = utils.decode_utf8(htmlText);
	    } catch (e) {
		this.htmlText = htmlText;
		//console.log('problem with decode', e);
	    }
	    if (!window) {
		this.HandleError(event);
		return;
	    }
	    this.charset = charset;
	    this.pageURL = url;
	    this.fragment = fragment;

	    this.responseStatus = responseStatus;

	    var document = window.document;
	    var rootElement = document.documentElement;
	    
	    // Create an iframe, make it hidden, and secure it against untrusted content.
	    this.Iframe = document.createElement('iframe');
	    this.Iframe.setAttribute("collapsed", true);
	    this.Iframe.setAttribute("type", "content");
	    
	    // Insert the iframe into the window, creating the doc shell.
	    rootElement.appendChild(this.Iframe);

	    var webNav = this.Iframe.docShell.QueryInterface(Ci.nsIWebNavigation);
	    webNav.stop(Ci.nsIWebNavigation.STOP_NETWORK);

	    // security.
	    this.Iframe.docShell.allowJavascript = false;
	    this.Iframe.docShell.allowAuth = false;
	    this.Iframe.docShell.allowPlugins = false;
	    this.Iframe.docShell.allowMetaRedirects = false;
	    this.Iframe.docShell.allowSubframes = false;
	    this.Iframe.docShell.allowImages = false;
	    
	    var parseHandler = {
		Self: this,
		handleEvent: function (event) {
		    event.target.removeEventListener("DOMContentLoaded", this, false);
		    try     { 
			var result = this.Self.HandleParse(event, function (stream) {
			parseResult(stream);
		    }); }
		    finally { this.Self = null }
		    
		    return result;
		}
	    };
	    var async = this.Iframe.addEventListener("DOMContentLoaded", parseHandler, true);
	    
	    this.Iframe.contentDocument.location = utils.generateDataURI(htmlText, charset, url);
	    
	},

	/**
	 * Handle a load event for the iframe-based parser.
	 * 
	 * @param   event
	 *          the event object representing the load event
	 *
	 */
	HandleParse: function (event, callback) {

	    var jsCheckString, domHandler, currentIframe, result, 
	        commentedOut, serialScript, newData, len, newDOM;
	    var that = this, handler;
	    currentIframe = event.originalTarget;

	    // version 2.2.4 bug fix. the line below fixes a bug in which the body
	    // tag contents isn't properly passed from the data URI to the iframe.
	    // re-embedding the whole HTML string (including doctype and <html> tag, 
	    // but it is stripped as part of the parsing) seems to fix it.
	    try {
		currentIframe.getElementsByTagName('html')[0].innerHTML = this.htmlText;
		handler = domHandlerModule.domHandler(currentIframe, this.pageURL, this.fragment, this.responseStatus, function (dom) {
		    callback(utils.serializeToStream(dom, that));
                    dom = null;
                    handler = null;
		    currentIframe = null;
		});
	    } catch (e) {
		handler = null;
		currentIframe = null;
		console.log('issue with domHandler,', e, e.lineNumber);		
	    }
	}
    }
};