# Copyright: 2006 Regents of the University of California,
# Santa Barbara Coastal LTER
# http://sbcdata.lternet.edu/
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# 02111-1307  USA

$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
require "rexml/document"
require "net/http"
require "uri"
require "eml.rb"

# Changing buffer size to greatly improve performance 
class Net::BufferedIO
  def rbuf_fill
    timeout(@read_timeout) {
      #changed from 1024
      @rbuf << @io.sysread(32768)
    }
  end
end

# = Metacat Client Library
# == What is it
# A client for the Metacat data catalog. For a description of Metacat, see
# http://knb.ecoinformatics.org/software/metacat
# For now, this client does not implement all features of the API. Rather, 
# it focuses on querying and returning Eml metadata objects from either 
# pathqueries or docid's. Should you find yourself using methods other than find()
# very often, you may be veering from the original intent.
# 
# ==   Examples
# ===  Read metadata for a public document
#
#   require 'lib/metacat.rb'
#   metacat = Metacat.new('http://data.piscoweb.org/catalog/metacat')
#   eml = metacat.find(:docid => 'pisco.10.4')
#   puts eml.docid
#   => 'pisco.10.4'
#
# === Log into Metacat and read Eml metadata. Then logout
#
#   username = 'uid=cburt,o=PISCO,dc=ecoinformatic,dc=org'
#   password = *****
#   Metacat.new('http://data.piscoweb.org/catalog/metacat', username, password) do |metacat|
#     eml = metacat.find(:docid => 'pisco.10.3')
#     start, end = eml.temporal_coverage
#     puts "start: #{start}, end: #{end}" 
#   end
#
# === Search for oceanographic data
#
#   metacat = Metacat.new('http://data.piscoweb.org/catalog/metacat')
#   pathquery = '...' # see example at http://knb.ecoinformatics.org/software/metacat/metacatquery.html
#   docs = metacat.find(:squery => pathquery)
#   docs.each { |eml| puts eml.docid }
#
# === Find and write a data_table to local disk
#   Metacat.new('http://data.piscoweb.org/catalog/metacat', username, password) do |metacat|
#     file = File.new('tmp', 'w+')
#     # using a block you can avoid loading the whole file into memory!
#     metacat.read('data_table.1.1') do |fragment|
#       file.write(fragment)
#     end
#     file.close
#   end 
class Metacat
      
  def initialize(path_to_metacat, options = {}, &block)
    @uri = URI.parse(path_to_metacat)
    @cookie = false
    if options.has_key?('username') && options.has_key?('password')
      login(options['username'], options['password'])
    end
    if block_given?
      yield self
      logout if @logged_in
    end
  end
  
  # Check if the metacat instance has a session cookie
  def logged_in?
    if @cookie 
      true
    else 
      false
    end
  end
  
  # Returns either an array of Eml documents(or nil) if :squery is passed or
  # a single Eml document(or nil) if passed :docid. This function _will_ _not_
  # return a data table, only Eml objects.
  #
  # If you need to retrieve a data table or other document, use read()
  #
  # Examples:
  #   Metacat.find(:docid => 'cbs_10.1')
  #   Metacat.find(:squery => xml_path_query)
  # 
  def find(args)
    if args[:docid] && args[:squery]
      raise ArgumentError, "Too many parameters. Choose :docid or :squery"
    elsif args[:docid]
      result = read(args[:docid], 'only_eml' => true)
      unless result.nil?
        try_eml(result) || raise(ArgumentError, "#{args[:docid]} does not refer to eml metadata. To read other documents use read.")
      end
    elsif args[:squery]
      results = squery(args[:squery])
      doc = REXML::Document.new(results)
      documents = Array.new()
      doc.elements.each("/resultset/document") { |document|
        docid = document.elements[1].text
        documents.push(try_eml(read(docid, 'only_eml' => true)))
      }
      return documents.delete_if {|doc| doc == false }
    end
  end
  
  # Logs into metacat using ldap authentication. Usernames are complex, such as 
  # 'uid=cburt,o=PISCO,dc=ecoinformatics,dc=org'
  #
  # Raises MetacatPermissionDenied exception on fail
  #
  # Example
  #   metacat.login('uid=cburt,o=PISCO,dc=ecoinformatics,dc=org', '******')
  #   => true
  def login(username, password)  
    response = metacat_get({
      'action'    =>  'login',
      'qformat'   =>  'xml',
      'username'  =>  username,
      'password'  =>  password
    })
    if(response.content_type == 'text/xml')
      doc = REXML::Document.new(response.read_body)
      if(doc.root.name == 'login')
        @cookie = response.response['set-cookie']
        @logged_in = true
      else
        raise MetacatPermissionDenied, "login error: "+doc.root.elements['message'].text
      end
    else
      raise MetacatResponseError
    end
  end
  
  def logout
    response = metacat_get({
      'action'    =>  'logout',
      'qformat'   =>  'xml'
    })
    if(response.content_type == 'text/xml')
      doc = REXML::Document.new(response.read_body)
      if doc.root.name = 'logout'
        @cookie = false
        return true
      else
        raise 'Failed to logout: '+doc.root.text
      end
    else
      raise MetacatResponseError
    end    
  end
  
  # Reads a specified document from metacat. If xml is found, a REXML::Document will be returned
  #
  # When reading text data tables, it should be noted that loading the entire large file can
  # consume an enormous amount of memory. To avoid this, read can be passed a &block. The block
  # will recieve fragments of the file as it comes in.
  #
  # Examples:
  # Reading an EML document
  #   metacat.read('eml_doc.1.1')
  #   => <REXML::Document >
  # 
  # Writing a data table to disk
  #   file = File.new('tmp', 'w+')
  #   metacat.read('data_table.1.1') do |fragment|
  #     file.write(fragment)
  #   end
  #   file.close
  #
  # Reading an entire data table into memory
  #   data_table = metacat.read('data_table.1.1')
  def read(docid, options = {}, &block) # :yields: xml or data_table fragment
    data = {
      'action'  =>  'read',
      'qformat' =>  'xml',
      'docid'   =>  docid
    }
    metacat_get(data) do |response|
      if response.content_type == 'text/xml'
        doc = REXML::Document.new(response.read_body)
        if(doc.root.name == 'error')
          if(doc.root.text.match('permission'))
            raise MetacatPermissionDenied, doc.root.text
          elsif(doc.root.text.match('does not exist'))
            # Nothing found, return nil
            return nil
          else
            raise 'Unrecognized response from metacat: '+doc.root.text
          end
        else # xml data
          return doc
        end
      else # probably a data table
        if (options.has_key?('only_eml') && options['only_eml'] == true)
          return nil
        else
          if block_given?
            response.read_body { |buffer| yield buffer }
          else
            response.read_body
          end
        end
      end      
    end
  end  
  
  # Uses the metacat pathquery search and returns the xml response as a string.
  # For query format information, see 
  # http://knb.ecoinformatics.org/software/metacat/metacatquery.html 
  def squery(squery)
    response = metacat_get({
      'action'  =>  'squery',
      'qformat' =>  'xml',
      'query'   =>  squery
    })
    if(response.content_type == 'text/xml')
      response.read_body
    else
      raise "Metacat returned unexpected Content Type"
    end
  end

  private
  
  def try_eml(doc)
    begin
      Eml.new(doc)
    rescue ArgumentError
      return false
    end
  end
  
  def metacat_post(data, &block)
    Net::HTTP.start(@uri.host, @uri.port) do |http|
      if block_given?
        http.request_post(@uri.path, data, headers) {|r| yield(r) }
      else
        http.post(@uri.path, data, headers)
      end
    end
  end
  
  def metacat_get(data, &block)
    path = @uri.path
    path = path+query_string(data)
    Net::HTTP.start(@uri.host, @uri.port) do |http|
      if block_given?
        http.request_get(path, headers) {|r| yield(r) }
      else
        http.get(path, headers)
      end
    end
  end
  
  def query_string(hash)
    qstring = []
    hash.each {|k, v| qstring << "#{k}=#{URI.encode(v)}" }
    '?'+qstring.join('&')
  end
  
  def headers
    {'Cookie' =>  @cookie} if @cookie
  end
  
end

class MetacatPermissionDenied < RuntimeError
end

class MetacatResponseError < RuntimeError
end