/**
 *    '$RCSfile: DocumentDataPackageParser.java,v $'
 *
 *     '$Author: leinfelder $'
 *       '$Date: 2009/03/27 20:21:34 $'
 *   '$Revision: 1.4 $'
 *
 *  For Details: http://kepler.ecoinformatics.org
 *
 * Copyright (c) 2003 The Regents of the University of California.
 * All rights reserved.
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and its documentation for any purpose, provided that the
 * above copyright notice and the following two paragraphs appear in
 * all copies of this software.
 *
 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
 * IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
 * OF SUCH DAMAGE.
 *
 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
 * OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
 * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 */

package org.ecoinformatics.datamanager.parser.document;

import java.io.InputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Vector;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.xpath.CachedXPathAPI;
import org.ecoinformatics.datamanager.parser.Attribute;
import org.ecoinformatics.datamanager.parser.AttributeList;
import org.ecoinformatics.datamanager.parser.DataPackage;
import org.ecoinformatics.datamanager.parser.Domain;
import org.ecoinformatics.datamanager.parser.Entity;
import org.ecoinformatics.datamanager.parser.NumericDomain;
import org.ecoinformatics.datamanager.parser.TextDomain;
import org.ecoinformatics.datamanager.parser.generic.DataPackageParserInterface;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

import edu.ucsb.nceas.utilities.OrderedMap;
import edu.ucsb.nceas.utilities.XMLUtilities;

/**
 * This is plugin Parser parses any XML file to 
 * retrieve the specified xPath elements as a Map record 
 * 
 * Note that the document
 * needs to have a 'packageId' attribute as part of the root node
 *  
 * 
 * @author leinfelder
 */
public class DocumentDataPackageParser implements DataPackageParserInterface
{
    /*
     * Class fields
     */

    private static Log log = LogFactory.getLog(DocumentDataPackageParser.class);
    
    /*
     * Instance fields
     */
    
    // previously these were constants, now member variables with defaults
    protected String packageIdPath = null;
	protected String packageId = null;
    private DocumentDataPackage dataPackage = null;
    private Map attributeXPathMap = null;
    private Map record = null;
    private Document doc = null;
    
    /**
     * Default constructor - no custom xpath parameter
     */
    public DocumentDataPackageParser() {
    	this.packageIdPath = "//*/@packageId";
    	this.record = new HashMap();
    }
    
    public void setAttributeXPathMap(Map xpaths) {
    	this.attributeXPathMap = xpaths;
    }
    
    private Vector getRecordRow() {
    	Vector row = new Vector();
    	row.addAll(record.values());
    	
    	return row;
    }
    
    /**
     * Constructor that accepts a packageId
     * Allows packageId supplied by external caller
     * @param packageId specifying the desired id for the resulting data package
     */
    public DocumentDataPackageParser(String packageId) {
    	this();
    	//set the param
		this.packageId = packageId;
    }

    
    /* (non-Javadoc)
	 * @see org.ecoinformatics.datamanager.parser.generic.GenericDatasetParserInterface#parse(org.xml.sax.InputSource)
	 */
    public void parse(InputSource source) throws Exception
    {
        DocumentBuilder builder = 
        	DocumentBuilderFactory.newInstance().newDocumentBuilder();
        doc = builder.parse(source);
        parseDocument();
    }

    
    /* (non-Javadoc)
	 * @see org.ecoinformatics.datamanager.parser.generic.GenericDatasetParserInterface#parse(java.io.InputStream)
	 */
    public void parse(InputStream is) throws Exception
    {
        DocumentBuilder builder = 
        	DocumentBuilderFactory.newInstance().newDocumentBuilder();
        doc = builder.parse(is);
        parseDocument();
    }
    
    /**
     * Parses the document. Uses the attributeMap to determine the
     * attributes included in the returned Map record  
     * 
     * @param doc  the Document object to be parsed
     */
    private void parseDocument() throws Exception {
    	//look up the packageId if not set
    	if (packageId == null) {
	    	CachedXPathAPI xpathapi = new CachedXPathAPI();
	    	try {
	    		// process packageid
	    		Node packageIdNode = xpathapi.selectSingleNode(doc, packageIdPath);
	    		if (packageIdNode != null) {
	    			packageId = packageIdNode.getNodeValue();
	    		}
	        } 
	        catch (Exception e) {
	            throw new Exception(
	            		"Error extracting packageId from root of document.");
	        }
    	}
        dataPackage = new DocumentDataPackage(packageId);
    }
    
    public void generateEntity() throws Exception {
    	
    	if (this.attributeXPathMap == null) {
        	throw new Exception(
        			"Must specify attribute xPaths for document->record parsing.");
        }
    	
    	//now get the flattened document as a map
        this.record = document2Map(doc, this.attributeXPathMap);
        
        //convert the map to an entity
        Entity entity = map2Entity(this.record, dataPackage.getPackageId());
        
        //add the entity to the datapackage
        this.dataPackage.clearEntityList();
        this.dataPackage.add(entity);
        
        //set the row record data
        this.dataPackage.setRecordRow(getRecordRow());
    }
    
    /* (non-Javadoc)
	 * @see org.ecoinformatics.datamanager.parser.generic.GenericDatasetParserInterface#getDataPackage()
	 */
    public DataPackage getDataPackage()
    {
    	return dataPackage;
    }
    
	public static Entity map2Entity(Map record, String entityId) {
		
		AttributeList attributeList = new AttributeList();
		
		Iterator iter = record.keySet().iterator();
		while (iter.hasNext()) {
			String id = (String) iter.next();
			String name = id;
			Object value = record.get(id);
			Domain domain = new TextDomain(); 
			//TODO handle more specific numeric types?
			if (value instanceof Number) { 
				domain = new NumericDomain("real", null, null);
			}
			Attribute a = new Attribute(id, name, domain);
			attributeList.add(a);
		}
		
		Entity entity = 
			new Entity(
					entityId,
					entityId, // + " name", 
					entityId, // + " description",
					attributeList);
		
		//set some other crucial info for generating the tables and sql
		entity.setPackageId(entity.getId());
		entity.setEntityIdentifier(entity.getId());
		
		return entity;
	}
	
	public static Map document2Map(Document doc, Map attributeXPaths) {
		Map record = new OrderedMap();
		
		try {
			
			//go through the list of attribute labels (key to xpath values)
			Iterator xPathIter = attributeXPaths.keySet().iterator();
			while (xPathIter.hasNext()) {
				String attributeLabel = (String) xPathIter.next();
				String attributeXPath = (String) attributeXPaths.get(attributeLabel);
				
				//handle NodeList, not just single Node
				NodeList attributeNodeList = XMLUtilities.getNodeListWithXPath(doc.getDocumentElement(), attributeXPath);
				
				//include placeholders for those non existent attributes but include null values
				if (attributeNodeList == null) {
					log.debug("no nodes found for xPath: " + attributeXPath);
					record.put(attributeLabel, null);
					log.debug("added null placeholder for attribute: " + attributeLabel);
					continue;
				}
				//get the value[s] for the attribute
				for (int i=0; i < attributeNodeList.getLength(); i++) {
					
					//get the node
					Node attributeNode = attributeNodeList.item(i);
					
					//get the text value of the node
					//TODO should we use DOM level 3 and assume java 1.5?
					String nodeTextContent = null; //attributeNode.getTextContent();
					nodeTextContent = attributeNode.getTextContent();
//					if (attributeNode.getFirstChild() != null && attributeNode.getFirstChild().getNodeType() == Node.TEXT_NODE) {
//						nodeTextContent = attributeNode.getFirstChild().getNodeValue();
//					}
					
					//add the attribute to the Map, taking care to handle multiples
					String columnLabel = attributeLabel;
					if (record.containsKey(columnLabel)) {
						if (i == 1) {
							//get the first original label without the number and reassign it with "_1"
							Object firstValue = record.get(columnLabel);
							//record.remove(columnLabel);
							record.put(columnLabel + "_" + i, firstValue);
						}
						columnLabel = columnLabel + "_" + (i+1);
					}
					record.put(columnLabel, nodeTextContent);
					
					log.debug("added flat attribute: " + columnLabel + "=" + nodeTextContent);
					
				}				

			}
		}
		catch (Exception e) {
			log.error("could not flatten attributes in document: " + e.getMessage());
			e.printStackTrace();
		}
		
		return record;
	}
 
    
}