/**
 *    '$RCSfile: TextComplexFormatDataReader.java,v $'
 *
 *     '$Author: costa $'
 *       '$Date: 2006-11-06 21:18:34 $'
 *   '$Revision: 1.2 $'
 *
 *  For Details: http://kepler.ecoinformatics.org
 *
 * Copyright (c) 2003 The Regents of the University of California.
 * All rights reserved.
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and its documentation for any purpose, provided that the
 * above copyright notice and the following two paragraphs appear in
 * all copies of this software.
 *
 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
 * IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
 * OF SUCH DAMAGE.
 *
 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
 * OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
 * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 */
package org.ecoinformatics.datamanager.database;

import java.io.InputStream;
import java.util.Vector;

//import org.apache.commons.logging.Log;
//import org.apache.commons.logging.LogFactory;
//import org.ecoinformatics.util.DelimitedReader;
import org.ecoinformatics.datamanager.parser.Attribute;
import org.ecoinformatics.datamanager.parser.Entity;
import org.ecoinformatics.datamanager.parser.TextComplexDataFormat;
import org.ecoinformatics.datamanager.parser.TextDelimitedDataFormat;
import org.ecoinformatics.datamanager.parser.TextWidthFixedDataFormat;

/**
 * @author tao
 * 
 * This class will read a data inputstream and split them into a row vectors base
 * on the given ComplexDataFormat array. This class have a public method -
 * getRowVector.After reach the end of stream, empty vector 
 * will be returned. So this method can be iterated by a while loop until
 * a empty vector hited. During the iteration, every data in the stream will
 * be pulled out.
 */
public class TextComplexFormatDataReader extends TextDataReader
{
   /*
    * Class fields
    */
  
   /*private static Log log;
  
   static {
      log = LogFactory.getLog( "org.kepler.objectmanager.data.text.TextComplexFormatDataReader" );
   }*/

   //constants
   public static final String DEFAULTVALUE = "";
  
   
   /*
    * Instance fields
    */
   private InputStream dataStream = null;
   private Entity entity = null;
   private boolean stripHeader = true;
   private int numberOfAttirbute = 0;
   private TextComplexDataFormat[] formats = null;
   private String physicalLineDelimiter = null;
   private int    numberOfHeaderLines   = 0;
   private int physicalLineDelimiterLength = 0;
   private int headLineNumberCount = 0;
   
   
   /*
    * Constructors
    */
   
   /**
    * Consturctor with default stripHeader value - true.
    * 
    * @param dataStream  the data input stream
    * @param enity       the entity metadata to describe the data stream
    */
   public TextComplexFormatDataReader(InputStream dataStream, Entity entity) 
           throws Exception
   {
      this(dataStream, entity, true);
   }
   
   
   /**
    * Constructor with assigned stripHeader value.
    * 
    * @param dataStream  the data input stream
    * @param enity       the entity metadata to describe the data stream
    * @param stripHeader if strip header when we hand read the input stream
    */
   public TextComplexFormatDataReader(InputStream dataStream, 
                                      Entity entity, 
                                      boolean stripHeader) 
           throws Exception
   {
       if (dataStream == null || entity == null)
       {
           throw new Exception("Data inputstream or entity metadata is null");
       }
       this.dataStream      = dataStream;
       this.entity          = entity;
       this.stripHeader     = stripHeader;
       getParameterFromEntity();
     
   }
   
   
   /*
    * Method to set up other parameter will be used in the reader.
    * Such as numberOfArributes, physicalLineDelimiter. 
    */
   private void getParameterFromEntity() throws Exception
   {
       Attribute[] attributeList = entity.getAttributes();
       
       if (attributeList == null)
       {
           throw new Exception("Attribute in entity metadata is null");
       }
       else
       {
           numberOfAttirbute = attributeList.length;
       }
       
       numberOfHeaderLines = entity.getNumHeaderLines();
       
       if (numberOfHeaderLines == -1)
       {
           numberOfHeaderLines = 0;
       }
       
       //physicalLineDelmiter will get from physicalDelimiter elements
       // if no physicalDelimiter element, we will try record delimter
       physicalLineDelimiter = entity.getPhysicalLineDelimiter();
       
       if (physicalLineDelimiter == null )
       {
           physicalLineDelimiter = entity.getRecordDelimiter();
       }
       
       physicalLineDelimiter = 
                   DelimitedReader.unescapeDelimiter(physicalLineDelimiter);
       physicalLineDelimiterLength = physicalLineDelimiter.length();
       
       formats = entity.getDataFormatArray();
       
       if (formats == null)
       {
           throw new Exception("Complext format is null in metadata entity");
       }
       else
       {
           int length = formats.length;
           
           if (length != numberOfAttirbute)
           {
               throw new Exception("Complex formats should have same number as attribute number"); 
           }
       }
   }
   
   
   /**
    * This method will read one row from inputstream and return a data vector 
    * which element is String and the value is field data. After reach the end 
    * of stream, empty vector will be returned. So this method can be iterated 
    * by a while loop until a empty vector hited. During the iteration, every 
    * data in the stream will be pulled out.
    * 
    * @return Vector
    */
   public Vector getOneRowDataVector() throws Exception
   {
     Vector oneRowDataVector = new Vector();
     StringBuffer lineDelimiterBuffer = new StringBuffer();// to store delmiter
     StringBuffer fieldValueBuffer = new StringBuffer();
     int singleCharactor = -2;
     int columnCount  =  1;// this is for every character in one row
     int attributeCount = 0; // this is for every attribute
     boolean startNewAttribute = true;
     boolean isWidthFix        = true;
     int width   = -1;
     int widthCount = 0;
     boolean startWidthCount = false;
     int startColumnNumberFromFormat = -1;
     String fieldDelimiter = null;
     
     if (dataStream != null)
     {
         singleCharactor = dataStream.read();
         
         while (singleCharactor != -1)
         {
           char charactor = (char)singleCharactor;
           // strip header
           if (stripHeader && numberOfHeaderLines > 0 && 
                   headLineNumberCount < numberOfHeaderLines)
           {
               lineDelimiterBuffer.append(charactor);
               if (lineDelimiterBuffer.length() == physicalLineDelimiterLength && 
                       lineDelimiterBuffer.toString().equals(physicalLineDelimiter))
               {
                   //reset the delimiter buffer
                   lineDelimiterBuffer = new StringBuffer();
                   headLineNumberCount++;
               }
               else if (lineDelimiterBuffer.length() == physicalLineDelimiterLength)
               {
                   // reset the delimiter buffer
                   lineDelimiterBuffer = new StringBuffer();
               }
              
           }
           else
           {
               // handle data after strip header
               fieldValueBuffer.append(charactor);
               lineDelimiterBuffer.append(charactor);
               
               // set up format info
               if (startNewAttribute)
               {
                  startNewAttribute = false;
                  //find the format from array
                  TextComplexDataFormat format = formats[attributeCount];
                  if (format == null)
                  {
                     throw new Exception(
                                    "The text format is null for an attribute");
                  }
                  else if (format instanceof TextWidthFixedDataFormat)
                  {
                     TextWidthFixedDataFormat widthFormat = 
                                              (TextWidthFixedDataFormat) format;
                     width = widthFormat.getFieldWidth();
                     startColumnNumberFromFormat = 
                                              widthFormat.getFieldStartColumn();
                     isWidthFix = true;
                     startWidthCount = false;

                   }
                   else if (format instanceof TextDelimitedDataFormat)
                   {
                      TextDelimitedDataFormat delimitedFormat = 
                                               (TextDelimitedDataFormat) format;
                      fieldDelimiter = delimitedFormat.getFieldDelimiter();
                      isWidthFix = false;
                   }
               }
               
               if (isWidthFix)
               {
                  // find start cloumn if metadata specify it
                  if (startColumnNumberFromFormat != -1 && 
                      startColumnNumberFromFormat == columnCount)
                  {
                      fieldValueBuffer = new StringBuffer();
                      fieldValueBuffer.append(charactor);
                      startWidthCount = true;
                  }
                  else if ( startColumnNumberFromFormat == -1)
                  {
                      startWidthCount = true;
                  }
                  // start count width
                  if (startWidthCount)
                  {
                      widthCount++;
                  }
                  // we got the value when widthcount reach width of this format
                  if (widthCount == width)
                  {
                      String value = fieldValueBuffer.toString();
                      //log.debug("Add width fixed attribute value " + value +
                      //       " to the vector");
                      oneRowDataVector.add(value.trim());
                      widthCount = 0;
                      startWidthCount = false;
                      fieldValueBuffer = new StringBuffer();
                      startNewAttribute = true;
                      attributeCount++;
                  }
                  
               }
               else
               {
                   // for delimter data
                   if (fieldValueBuffer.toString().endsWith(fieldDelimiter))
                   {
                       String value = fieldValueBuffer.toString();
                       value = value.substring(0, value.length() - 
                               fieldDelimiter.length());
                       //log.debug("Add delimited attribute value " + value +
                       //        " to the vector" );
                       oneRowDataVector.add(value.trim());
                       fieldValueBuffer = new StringBuffer();
                       startNewAttribute = true;
                       attributeCount++;
                   }
               }
               
               columnCount++;
               
               // reset columnCount to 1 when hit a physical line delimiter
               if (lineDelimiterBuffer.length() == physicalLineDelimiterLength 
                   && 
                   lineDelimiterBuffer.toString().equals(physicalLineDelimiter))
               {
                   //reset the delimiter buffer
                   lineDelimiterBuffer = new StringBuffer();
                   columnCount = 1;
               }
               else if 
                   (lineDelimiterBuffer.length() == physicalLineDelimiterLength)
               {
                   // reset the delimiter buffer
                   lineDelimiterBuffer = new StringBuffer();
               }
               
               // get a row vector break it.
               if (attributeCount == numberOfAttirbute)
               {
                   break;
               }
           }
           
           singleCharactor = dataStream.read();
         }
     }
     
     // if row vector is not empty and its length less than number of 
     // attributes, we should add "" string to make its length equal to
     // the attribute length.
     if (! oneRowDataVector.isEmpty() && 
           oneRowDataVector.size() <  numberOfAttirbute)
     {
         int size = oneRowDataVector.size();
         
         for (int i = size ; i< numberOfAttirbute; i++)
         {
             oneRowDataVector.add(DEFAULTVALUE);
         }
     }
     
     return oneRowDataVector;
   }
  
   
    /**
     * @return Returns the dataStream field.
     */
    public InputStream getDataStream()
    {
        return dataStream;
    }

    
    /**
     * Sets the dataStream field to a given input stream.
     * 
     * @param dataStream The InputStream value to set.
     */
    public void setDataStream(InputStream dataStream)
    {
        this.dataStream = dataStream;
    }

    
    /**
     * Gets the value of the entity field.
     * 
     * @return Returns the entity field.
     */
    public Entity getEntity()
    {
        return entity;
    }

    
    /**
     * Sets the value of the entity field to the specified Entity object.
     * 
     * @param entity  The Entity value to set.
     */
    public void setEntity(Entity entity)
    {
        this.entity = entity;
    }
    
}