/** * '$RCSfile: DownloadHandler.java,v $' * * '$Author: leinfelder $' * '$Date: 2008-08-08 21:40:51 $' * '$Revision: 1.28 $' * * For Details: http://kepler.ecoinformatics.org * * Copyright (c) 2003 The Regents of the University of California. * All rights reserved. * * Permission is hereby granted, without written agreement and without * license or royalty fees, to use, copy, modify, and distribute this * software and its documentation for any purpose, provided that the * above copyright notice and the following two paragraphs appear in * all copies of this software. * * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN * IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY * OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, * UPDATES, ENHANCEMENTS, OR MODIFICATIONS. */ package org.ecoinformatics.datamanager.download; import java.io.BufferedOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.Hashtable; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.ecoinformatics.datamanager.database.DatabaseLoader; import org.ecoinformatics.datamanager.parser.Entity; import org.ecoinformatics.datamanager.quality.QualityCheck; import org.ecoinformatics.datamanager.quality.QualityReport; import org.ecoinformatics.datamanager.quality.QualityCheck.Status; import org.ecoinformatics.ecogrid.authenticatedqueryservice.AuthenticatedQueryServiceGetToStreamClient; import org.ecoinformatics.ecogrid.queryservice.QueryServiceGetToStreamClient; /** * This class will read a input stream from remote entity for given URL and * write data into given local storage systems. This is the main class of download * component. The class implements Runnable interface, so the download process * will be run in another thread. * * @author tao */ public class DownloadHandler implements Runnable { /* * Class fields */ public static Log log = LogFactory.getLog(DownloadHandler.class); // Used in quality reporting protected final static String ONLINE_URLS_EXCEPTION_MESSAGE = "Error reading from the data source."; /* * Constants */ private static final String SRBUSERNAME = "testuser.sdsc"; private static final String SRBPASSWD = "TESTUSER"; private static final int SLEEPTIME = 100; private static final int MAXLOOPNUMBER = 20000; protected static Hashtable handlerList = new Hashtable(); private static String SRBENDPOINT = "http://srbbrick8.sdsc.edu:8080/SRBImpl/services/SRBQueryService"; private static String SRBMACHINE = "srb-mcat.sdsc.edu"; /* * Instance fields */ //protected DownloadHandler handler = null; //private String identifier = null; private String url = null; private DataStorageInterface[] dataStorageClassList = null; private String[] errorMessages = null; protected boolean completed = false; protected boolean success = false; protected boolean busy = false; private Exception exception = null; protected String ecogridEndPoint = "http://ecogrid.ecoinformatics.org/knb/services/QueryService"; protected Entity entity = null; protected String sessionId = null; /* * Constructors */ /** * This version of the constructor stores the entity object for which the download is * being performed. This is to support quality reporting, where information about the * associated entity is needed as part of the quality information being reported on. * * @param entity the Entity object for which this DownloadHandler is downloading data * @param url the url (or identifier) of entity need be downloaded */ protected DownloadHandler(Entity entity, String url, EcogridEndPointInterface endPoint) { this(url, endPoint); this.entity = entity; } /** * Constructor of this class * @param url the url (or identifier) of entity need be downloaded */ protected DownloadHandler(String url, EcogridEndPointInterface endPoint) { this.url = url; if (endPoint != null) { ecogridEndPoint = endPoint.getMetacatEcogridEndPoint(); SRBENDPOINT = endPoint.getSRBEcogridEndPoint(); SRBMACHINE = endPoint.getSRBMachineName(); //do we have authenticated version? if (endPoint instanceof AuthenticatedEcogridEndPointInterface) { sessionId = ((AuthenticatedEcogridEndPointInterface)endPoint).getSessionId(); //can we actually use it? if (sessionId != null) { ecogridEndPoint = ((AuthenticatedEcogridEndPointInterface)endPoint).getMetacatAuthenticatedEcogridEndPoint(); } } } //loadOptions(); //this.identifier = identifier; //this.dataStorageClassList = dataStorageClassList; } /* * Class methods */ /** * Gets a downloadHandler with specified url from the hash. * Return null if no handler found for this source. * * @param source the source URL to which the returned download handler is * associated. The source URL is the key, the download * handler object is the associated value. * @return the DownloadHandler value associated with the source, or null * if DownloadHandler object is associated with this source. */ protected static synchronized DownloadHandler getHandlerFromHash( String source) { DownloadHandler handler = null; if (source != null) { handler = handlerList.get(source); // assign download handler to one in List } return handler; } /** * Gets an instance of the DownloadHandler Object for this URL. * * @param url The url (or identifier) of entity to be downloaded * @param endPoint the object which provides ecogrid endpoint information * @return DownloadHandler object associated with this URL * */ public static DownloadHandler getInstance(String url, EcogridEndPointInterface endPoint) { DownloadHandler handler = getHandlerFromHash(url); if (handler == null) { log.debug("Constructing DownloadHandler for URL: " + url); handler = new DownloadHandler(url, endPoint); } return handler; } /** * Gets an instance of the DownloadHandler Object for this URL. This version * of the method passes the associated entity object as a parameter, and * also calls the constructor that accepts the entity object. This is * to support quality reporting. When creating a qualityCheck object relating * to downloading, the DownloadHandler will need some information from the * entity such as the packageId and entity name. * * @param entity The entity object for which the download is being performed * @param url The url (or identifier) of entity to be downloaded * @param endPoint the object which provides ecogrid endpoint information * @return DownloadHandler object associated with this URL * */ public static DownloadHandler getInstance(Entity entity, String url, EcogridEndPointInterface endPoint) { DownloadHandler handler = getHandlerFromHash(url); if (handler == null) { log.debug("Constructing DownloadHandler for URL: " + url); handler = new DownloadHandler(entity, url, endPoint); } return handler; } /** * Sets the DownloadHandler object into the hash table. This will be called * at the start of download process. So we can keep track which handler is * doing the download job now. Since it will access a static variable * handlerList in different thread, it should be static and synchronized * * @param downloadHandler the DownloadHandler object to be stored in the * hash * */ private static synchronized void putDownloadHandlerIntoHash( DownloadHandler downloadHandler) { if (downloadHandler != null) { String source = downloadHandler.getUrl(); if (source != null) { //System.out.println("add the source "+source); handlerList.put(source, downloadHandler); } } } /** * Removes the downloadHandler obj from the hash table. This method will be * called at the end of download process. Since it will access a static * variable handlerList in different thread, it should be static and * synchronized; * * @param downloadHandler the DownloadHandler object to be removed * from the hash */ private static synchronized void removeDownloadHandlerFromHash( DownloadHandler downloadHandler) { if (downloadHandler != null) { String source = downloadHandler.getUrl(); if (source != null) { //System.out.println("remove the source "+source); handlerList.remove(source); } } } /* * Instance methods */ /** * This method will download data for the given url in a new thread. * It implements from Runnable Interface. */ public void run() { DownloadHandler handler = getHandlerFromHash(url); if (handler != null) { /* * A handler which points to the same URL is busy in downloading * process, so do nothing, just wait for the handler to finish the * download. */ int index = 0; while (handler.isBusy() && index < MAXLOOPNUMBER) { try { Thread.sleep(SLEEPTIME); } catch(Exception e) { break; } index++; } success = handler.isSuccess(); //System.out.println("after setting success "+success); busy =false; completed = true; //System.out.println(" don't need download"); return; } else { // if no handler which points same url, put the handler into hash table for tracking //System.out.println("need download"); putDownloadHandlerIntoHash(this); } busy = true; completed = false; //System.out.println("start get source"+url); try { success = getContentFromSource(url); } catch(Exception e) { log.error("Error in DownloadHandler run method" + e.getMessage()); } //System.out.println("after get source"+url); // waiting DataStorageInterface to finished serialize( some DataStorageInterface will // span another thread waitingStorageInterfaceSerialize(); if (dataStorageClassList != null) { int length = dataStorageClassList.length; for (int i=0; i"; } /** * Get data from ecogrid server base on given end point and identifier. * This method will handle the distribution url for ecogrid or srb protocol * This method will be called by getContentFromSource(). * * @param endPoint the end point of ecogrid service * @param identifier the entity identifier in ecogrid service */ protected boolean getContentFromEcoGridSource(String endPoint, String identifier) { // create a ecogrid client object and get the full record from the // client //System.out.println("=================the endpoint is "+endPoint); //System.out.println("=================the identifier is "+identifier); boolean successFlag = false; if (endPoint != null && identifier != null) { //log.debug("Get " + identifier + " from " + endPoint); try { //fatory //log.debug("This is instance pattern"); //log.debug("Get from EcoGrid: " + identifier); NeededOutputStream [] outputStreamList = getOutputStreamList(); if (outputStreamList != null) { boolean oneLoopSuccess = true; for (int i=0; i -1) { // write to each outputstreams for (int i = 0; i < outputStreamList.length; i++) { stream = outputStreamList[i]; if (stream != null && stream.getNeeded()) { os = stream.getOutputStream(); os.write(b, 0, bytesRead); } } // get the next bytes bytesRead = inputStream.read(b, 0, 1024); kilobytes++; /*if (kilobytes < 1000) { System.err.printf("."); if (kilobytes % 100 == 0) { System.err.printf("\n"); } } else if (kilobytes % 1000 == 0) { System.err.printf("\n Kilobytes read: %d", kilobytes); }*/ } //System.err.printf("\n"); // done writing to the streams for (int i = 0; i < outputStreamList.length; i++) { stream = outputStreamList[i]; if (stream != null && stream.getNeeded()) { os.flush(); os.close(); } } successFlag = true; //String error = null; //finishSerialize(error); log.info(String.format(" Total Kilobytes Read: %d\n", kilobytes)); log.debug("DownloadHandler.finishSerialize()"); } else { successFlag = false; } return successFlag; } catch (IOException e) { successFlag = false; e.printStackTrace(); throw(e); } catch (Exception ee) { successFlag = false; return successFlag; } } }