﻿/**
 * Copyright (c) 2006, Opera Software ASA
 * All rights reserved.
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of Opera Software ASA nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY OPERA SOFTWARE ASA AND CONTRIBUTORS ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL OPERA SOFTWARE ASA AND CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/**
 * Scraper class
 *
 * This class basically does XMLHttpRequests and use a callback function to scrape some content
 *
 *	@author Mathieu HENRI, Opera Software ASA
 *  @author Magnus Kristiansen, Opera Software ASA
 *	@version	0.9
 */

/** 
 *	@constructor
 */
function Scraper() {

	/**	@private	*/
	/**	the URL of the document to scrape	*/
	var	URL				= ''
	/** @private	*/
	/**	the username required to access the document	*/
	var	username		= null
	/** @private	*/
	/**	the password required to access the document	*/
	var	password		= null
	/** @private	*/
	/**	the scraper function that will be called back to scrape the document	*/
	var	scraperFunction	= null;
	/** @private	*/
	/**	the callback function that will be called back after scraping	*/
	var	returnFunction	= null;
	/** @private	*/
	/**	the list of data scraped	*/
	var	data			= {};


	/**
	 *	sets the URL and authentication parameters
	 *	@param	{String}	newURL				URL of the document to scrape.
	 *	@param	{String}	newUsername			[OPTIONAL] the username required to access the document. NULL by default.
	 *	@param	{String}	newPassword			[OPTIONAL] the password required to access the document. NULL by default.
	 *	@return	a flag indicating if the setting was succesful
	 *	@type	boolean
	 */
	this.setURLAndAuthentication = function( newURL, newUsername, newPassword )
	{
		if ( typeof newURL != 'string' ) {
			return false;
		}

		username = typeof newUsername == 'string' ? newUsername : null;
		password = typeof newPassword == 'string' ? newPassword : null;

		URL = newURL;

		return true;
	}


	/**
	 *	sets the scraper function
	 *	@param	{Function}	newScraperFunction	the handle of the scraper function. It will be called with an 'xml' and 'txt' arguments.
	 *	@return	a flag indicating if the setting was succesful ( if newScraperFunction is actually a function )
	 *	@type	boolean
	 */
	this.setScraperFunction = function( newScraperFunction )
	{
		if ( typeof newScraperFunction != 'function' ) {
			return false;
		}

		scraperFunction = newScraperFunction;
		
		return true;
	}

	/**
	 *	sets the callback function
	 *	@param	{Function}	newReturnFunction	the handle of the callback function. It will be called with the return value of the scraper function.
	 *	@return	a flag indicating if the setting was succesful ( if newScraperFunction is actually a function )
	 *	@type	boolean
	 */
	this.setReturnFunction = function( newReturnFunction )
	{
		if ( typeof newReturnFunction != 'function' ) {
			return false;
		}

		returnFunction = newReturnFunction;
		
		return true;
	}

	/**
	 *	triggers the request of a document and scraping of some data
	 *	@param	{String}	dataId				the id of the data to scrape
	 *	@param	{Number}	dataMaxAge			[OPTIONAL] maximum age ( in minutes ) of the data below which the cache will be used. 1 minute by default.
	 *  @param  {Function}	newScraperFunc		[OPTIONAL] temporary override of scraperFunction
	 *  @param  {Function}	newReturnFunc		[OPTIONAL] temporary override of returnFunction
	 *	@return	FALSE if the optional parameters are invalid,
	 *			NULL if the request of the document failed,
	 *			whatever the scaperFunction returns ( should be an array or an object )
	 *	@see #setURLAndAuthentication
	 *	@see #setScraperFunction
	 */
	this.scrapeData = function( dataId, dataMaxAge, newScraperFunc, newReturnFunc ) {
		
		var localScraperFunc = (typeof newScraperFunc == 'function' && newScraperFunc) || scraperFunction;
		var localReturnFunc = (typeof newReturnFunc == 'function' && newReturnFunc) || returnFunction;
		
		var error = null;
		if ( dataId == undefined ) { error = 'dataId missing'; }
		if ( URL == '' ) { error = 'no URL defined'; }
		if ( ! localScraperFunc ) { error = 'no scraper defined'; }
		if ( ! localReturnFunc ) { error = 'no callback defined'; }
			
		if ( error ) {
			opera.postError( 'Scraper.scrapeData: ' + error );
			return false;
		}

		var	dataMaxAge = Math.max( 1, dataMaxAge || 1 ) * 60 * 1000;
		if ( data[dataId] && data[dataId].timestamp + dataMaxAge > new Date().getTime() ) {
			opera.postError( 'Using cached data for request ' + dataId );
			return data[dataId].data;
		}

		var	XHR = new XMLHttpRequest()

		if ( username && password ) {
			XHR.open( 'get', URL, true, username, password );
		} else {
			XHR.open( 'get', URL, true );
		}
		
		XHR.onreadystatechange = function() {
			if ( XHR.readyState != 4) return;
			
			if ( XHR.status==200 || XHR.status==304 || XHR.status==0 ) {
				
				var xml = XHR.responseXML;
				var text = XHR.responseText;
				
				if (xml && xml.documentElement) {
					// valid XML, yay
				} else {
					opera.postError( 'No responseXML from ' + URL + '\nFalling back to DOMParser' );
					var parser = new DOMParser();
					xml = parser.parseFromString( text, 'text/html' );
				}
	
				data[dataId] = {
					timestamp: new Date().getTime(),
					data: localScraperFunc( xml, text )
				};
				
				localReturnFunc( data[dataId].data );
			} else {
				localReturnFunc( null );
			}
		}

		XHR.send(null);

	}

}