# Copyright: 2006 Regents of the University of California, # Santa Barbara Coastal LTER # http://sbcdata.lternet.edu/ # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # 02111-1307 USA $:.unshift File.join(File.dirname(__FILE__), "..", "lib") require "rexml/document" require "net/http" require "uri" require "eml.rb" # Changing buffer size to greatly improve performance class Net::BufferedIO def rbuf_fill timeout(@read_timeout) { #changed from 1024 @rbuf << @io.sysread(32768) } end end # = Metacat Client Library # == What is it # A client for the Metacat data catalog. For a description of Metacat, see # http://knb.ecoinformatics.org/software/metacat # For now, this client does not implement all features of the API. Rather, # it focuses on querying and returning Eml metadata objects from either # pathqueries or docid's. Should you find yourself using methods other than find() # very often, you may be veering from the original intent. # # == Examples # === Read metadata for a public document # # require 'lib/metacat.rb' # metacat = Metacat.new('http://data.piscoweb.org/catalog/metacat') # eml = metacat.find(:docid => 'pisco.10.4') # puts eml.docid # => 'pisco.10.4' # # === Log into Metacat and read Eml metadata. Then logout # # username = 'uid=cburt,o=PISCO,dc=ecoinformatic,dc=org' # password = ***** # Metacat.new('http://data.piscoweb.org/catalog/metacat', username, password) do |metacat| # eml = metacat.find(:docid => 'pisco.10.3') # start, end = eml.temporal_coverage # puts "start: #{start}, end: #{end}" # end # # === Search for oceanographic data # # metacat = Metacat.new('http://data.piscoweb.org/catalog/metacat') # pathquery = '...' # see example at http://knb.ecoinformatics.org/software/metacat/metacatquery.html # docs = metacat.find(:squery => pathquery) # docs.each { |eml| puts eml.docid } # # === Find and write a data_table to local disk # Metacat.new('http://data.piscoweb.org/catalog/metacat', username, password) do |metacat| # file = File.new('tmp', 'w+') # # using a block you can avoid loading the whole file into memory! # metacat.read('data_table.1.1') do |fragment| # file.write(fragment) # end # file.close # end class Metacat def initialize(path_to_metacat, options = {}, &block) @uri = URI.parse(path_to_metacat) @cookie = false if options.has_key?('username') && options.has_key?('password') login(options['username'], options['password']) end if block_given? yield self logout if @logged_in end end # Check if the metacat instance has a session cookie def logged_in? if @cookie true else false end end # Returns either an array of Eml documents(or nil) if :squery is passed or # a single Eml document(or nil) if passed :docid. This function _will_ _not_ # return a data table, only Eml objects. # # If you need to retrieve a data table or other document, use read() # # Examples: # Metacat.find(:docid => 'cbs_10.1') # Metacat.find(:squery => xml_path_query) # def find(args) if args[:docid] && args[:squery] raise ArgumentError, "Too many parameters. Choose :docid or :squery" elsif args[:docid] result = read(args[:docid], 'only_eml' => true) unless result.nil? try_eml(result) || raise(ArgumentError, "#{args[:docid]} does not refer to eml metadata. To read other documents use read.") end elsif args[:squery] results = squery(args[:squery]) doc = REXML::Document.new(results) documents = Array.new() doc.elements.each("/resultset/document") { |document| docid = document.elements[1].text documents.push(try_eml(read(docid, 'only_eml' => true))) } return documents.delete_if {|doc| doc == false } end end # Logs into metacat using ldap authentication. Usernames are complex, such as # 'uid=cburt,o=PISCO,dc=ecoinformatics,dc=org' # # Raises MetacatPermissionDenied exception on fail # # Example # metacat.login('uid=cburt,o=PISCO,dc=ecoinformatics,dc=org', '******') # => true def login(username, password) response = metacat_get({ 'action' => 'login', 'qformat' => 'xml', 'username' => username, 'password' => password }) if(response.content_type == 'text/xml') doc = REXML::Document.new(response.read_body) if(doc.root.name == 'login') @cookie = response.response['set-cookie'] @logged_in = true else raise MetacatPermissionDenied, "login error: "+doc.root.elements['message'].text end else raise MetacatResponseError end end def logout response = metacat_get({ 'action' => 'logout', 'qformat' => 'xml' }) if(response.content_type == 'text/xml') doc = REXML::Document.new(response.read_body) if doc.root.name = 'logout' @cookie = false return true else raise 'Failed to logout: '+doc.root.text end else raise MetacatResponseError end end # Reads a specified document from metacat. If xml is found, a REXML::Document will be returned # # When reading text data tables, it should be noted that loading the entire large file can # consume an enormous amount of memory. To avoid this, read can be passed a &block. The block # will recieve fragments of the file as it comes in. # # Examples: # Reading an EML document # metacat.read('eml_doc.1.1') # => # # Writing a data table to disk # file = File.new('tmp', 'w+') # metacat.read('data_table.1.1') do |fragment| # file.write(fragment) # end # file.close # # Reading an entire data table into memory # data_table = metacat.read('data_table.1.1') def read(docid, options = {}, &block) # :yields: xml or data_table fragment data = { 'action' => 'read', 'qformat' => 'xml', 'docid' => docid } metacat_get(data) do |response| if response.content_type == 'text/xml' doc = REXML::Document.new(response.read_body) if(doc.root.name == 'error') if(doc.root.text.match('permission')) raise MetacatPermissionDenied, doc.root.text elsif(doc.root.text.match('does not exist')) # Nothing found, return nil return nil else raise 'Unrecognized response from metacat: '+doc.root.text end else # xml data return doc end else # probably a data table if (options.has_key?('only_eml') && options['only_eml'] == true) return nil else if block_given? response.read_body { |buffer| yield buffer } else response.read_body end end end end end # Uses the metacat pathquery search and returns the xml response as a string. # For query format information, see # http://knb.ecoinformatics.org/software/metacat/metacatquery.html def squery(squery) response = metacat_get({ 'action' => 'squery', 'qformat' => 'xml', 'query' => squery }) if(response.content_type == 'text/xml') response.read_body else raise "Metacat returned unexpected Content Type" end end private def try_eml(doc) begin Eml.new(doc) rescue ArgumentError return false end end def metacat_post(data, &block) Net::HTTP.start(@uri.host, @uri.port) do |http| if block_given? http.request_post(@uri.path, data, headers) {|r| yield(r) } else http.post(@uri.path, data, headers) end end end def metacat_get(data, &block) path = @uri.path path = path+query_string(data) Net::HTTP.start(@uri.host, @uri.port) do |http| if block_given? http.request_get(path, headers) {|r| yield(r) } else http.get(path, headers) end end end def query_string(hash) qstring = [] hash.each {|k, v| qstring << "#{k}=#{URI.encode(v)}" } '?'+qstring.join('&') end def headers {'Cookie' => @cookie} if @cookie end end class MetacatPermissionDenied < RuntimeError end class MetacatResponseError < RuntimeError end