# Copyright: 2006 Regents of the University of California,
# Santa Barbara Coastal LTER
# http://sbcdata.lternet.edu/
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# 02111-1307  USA

$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
require "date"
require "rexml/document"
require "data_table.rb"

# == What is it
# The goal of this object is to encapsulate a dom representation of
# an EML(http://knb.ecoinformatics.org/software/eml) document and provide
# quick helper methods to access commonly needed attributes. These methods
# will return a more "ruby friendly" representation of this metadata.
# 
# At their core Eml objects contain a REXML::Document in the instance variable @doc.
# Until this object is feature-complete, this dom document can be used when this
# object is returned from this module's Metacat client.
# 
# ==   Examples
# ===  Get temporal coverage
#
#   metacat = Metacat.new('http://data.piscoweb.org/catalog/metacat')
#   eml_object = metacat.find(:docid => 'HMS001_020ADCP019R00_20060612.50.1')
#   geographic_coverage = eml_object.geographic_coverage
#   => [{ "latitude"=>-121.8996,
#         "longitude"=>36.6214,
#         "id"=>"HMS001",
#         "description"=>
#         "Hopkins Marine Station: HMS001: This inner-shelf mooring is located offshore 
#         of the city of Monterey, California, USA, near Hopkins Marine Station.  The 
#         mooring is located in an overall water depth of 020 meters (referenced to Mean 
#         Sea Level, MSL).  The altitudeMinimum and altitudeMaximum tags in this initial 
#         coverage section refer to the ADCP measurement range and are also referenced to 
#         MSL.  They do not represent the overall water depth.  Note the nominal range of 
#         the ADCP may extend from near-bottom (a depth expressed as a negative altitude) 
#         to slightly above MSL (a height expressed as a positive altitude)."}]
#
# ===  Get associated data table(DataTable) and write it to disk
#   eml_object.data_tables.each do |data_table|
#     file = File.new("./store/#{data_table.id}")
#     # data_table is an object, with method read
#     data_table.read do |buffer|
#       file.write(buffer)
#     end
#     file.close()
#   end
class Eml
  attr_reader :docid, :doc
  
  # Accepts an EML REXML::Document
  def initialize(metadata)
    if(metadata.class != REXML::Document || metadata.root.name != 'eml')
      raise ArgumentError, 'Must initialize with REXML::Document representation of EML metadata'
    else
      @doc = metadata
      @docid = @doc.root.attributes['packageId']
    end
  end
  
  def to_s
    @doc.to_s
  end
    
  def data_tables
    tables = []
    @doc.root.elements.each("dataset/dataTable") { |element|  
      tables.push(DataTable.new(element, self))
    }
    return tables
  end

  def largest_data_table
    if(data_tables.length == 1)
      return data_tables[0]
    else
      size = 0
      largest = nil
      data_tables.each do |data_table|
        if(data_table.size > size)
          size = data_table.size
          largest = data_table
        end
      end
      largest
    end
  end
  
  # Pulls a date range from the temporalCoverage element
  #
  # Note : EML supports multiple date ranges to account for gaps
  # this code just lumps them into one
  # Also, it does not support cases of singleDateTime
  def temporal_coverage
    beginDates = endDates = Array.new()
    path = "dataset/coverage/temporalCoverage/rangeOfDates"
    @doc.root.elements.each(path){ |range|  
      beginDates.push(
        Date.strptime(range.elements["beginDate"].elements[1].text)
      )
      endDates.push(
        Date.strptime(range.elements["endDate"].elements[1].text)
      )
    }
    return beginDates.min, endDates.max
  end
  
  def geographic_coverage
    sites = Array.new
    coverage.elements.each('geographicCoverage') do |g|
      site = {
        'id'          =>  g.attributes['id'],
        'description' =>  g.elements['geographicDescription'].text,
        'latitude'    =>  g.elements['boundingCoordinates/westBoundingCoordinate'].text.to_f,
        'longitude'   =>  g.elements['boundingCoordinates/northBoundingCoordinate'].text.to_f
      }
      sites << site
    end
    return sites
  end
  
  def coverage
    @doc.root.elements["dataset/coverage"]
  end
  
  def title
    @doc.root.elements["dataset/title"].text
  end
  
  def short_name
    @doc.root.elements["dataset/shortName"].text
  end
end