/**
* '$RCSfile: EMLParser.java,v $'
* Copyright: 1997-2002 Regents of the University of California,
* University of New Mexico, and
* Arizona State University
* Sponsors: National Center for Ecological Analysis and Synthesis and
* Partnership for Interdisciplinary Studies of Coastal Oceans,
* University of California Santa Barbara
* Long-Term Ecological Research Network Office,
* University of New Mexico
* Center for Environmental Studies, Arizona State University
* Other funding: National Science Foundation (see README for details)
* The David and Lucile Packard Foundation
* For Details: http://knb.ecoinformatics.org/
*
* '$Author: walbridge $'
* '$Date: 2008-11-05 21:08:45 $'
* '$Revision: 1.16 $'
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.ecoinformatics.eml;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.net.URL;
import java.util.Hashtable;
import java.util.Stack;
import java.util.StringTokenizer;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import org.apache.xpath.XPathAPI;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import edu.ucsb.nceas.utilities.config.ConfigXML;
/**
* This is a SAX parser to validate EML packages. This parser will validate
* an EML package with references based on the following rules:
*
* -
* If a user wants to reuse content to indicate the repetition of an object,
* a reference must be used. you cannot have two identical ids in a document.
*
* -
* "Local scope" is defined as identifiers unique only to a single instance
* document (if a document does not have a system or if scope is set to 'local'
* then all ids are defined as distinct content).
*
* -
* System scope is defined as identifiers unique to an entire data management
* system (if two documents share a system string, then any IDs in those two
* documents that are identical refer to the same object).
*
* -
* If an element references another element, it must not have an ID.
*
* -
* All EML packages must have the 'eml' module as the root.
*
* -
* The system and scope attribute are always optional except for at the
* 'eml' module where the scope attribute is fixed as 'system'. The scope
* attribute defaults to 'local' for all other modules.
*
*
*/
public class EMLParser
{
private String parserName;
private ConfigXML config;
private Key[] keys;
private Keyref[] keyrefs;
private Hashtable idHash = new Hashtable();
private Hashtable idrefHash = new Hashtable();
private File xml;
/**
* parses an eml file
* @param xml the eml input stream to parse
*/
public EMLParser(File xml)
{
this.xml = xml;
URL configFile = getClass().getResource("/config.xml");
try
{
config = new ConfigXML(configFile.openStream());
}
catch(Exception e)
{
throw new EMLParserException("Config file not found: " + e.getMessage());
}
parseConfig();
parseKeys();
parseKeyrefs();
}
/**
* parses an eml file
* @param xml the eml file to parse
* @param configFile the alternate config file to use
*/
public EMLParser(File xml, File configFile)
throws EMLParserException
{
this.xml = xml;
try
{
config = new ConfigXML(configFile.getAbsolutePath());
}
catch(Exception e)
{
throw new EMLParserException("Config file not found: " + e.getMessage());
}
parseConfig();
parseKeys();
parseKeyrefs();
}
/**
* parses an eml reader
* @param xmlReader the xml need to parse
* @param configFile the alternate config file to use
*/
public EMLParser(String xmlString)
throws EMLParserException, IOException
{
if (xmlString == null || xmlString.equals(""))
{
throw new EMLParserException("The string need to be parse is null");
}
URL configFile = getClass().getResource("/config.xml");
try
{
config = new ConfigXML(configFile.openStream());
}
catch(Exception e)
{
throw new EMLParserException("Config file not found: " + e.getMessage());
}
// catch the String reader
parseConfig();
parseKeys(xmlString);
parseKeyrefs(xmlString);
}
/**
* make sure all ids are unique and hash the keys
*/
private void parseKeys()
{
for(int i=0; i");
s.push(node);
}
while(!s.empty())
{
String node = (String)s.pop();
xml.append("").append(node).append(">");
}
return builder.parse(new InputSource(new StringReader(xml.toString())));
}
catch(Exception e)
{
throw new EMLParserException("Error building document fragment: " +
e.getMessage());
}
}
private void resolveKeys()
{
}
/**
* Gets the content of a path in an xml file(form input stream)
*/
public static NodeList getPathContent(InputStream is, String xpath)
throws Exception
{
InputSource in = new InputSource(is);
return getPathContent(in, xpath);
}
/**
* Gets the conten of a path in an xml document(from Reader)
*/
public static NodeList getPathContent(StringReader read, String xpath)
throws Exception
{
InputSource in = new InputSource(read);
DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
dfactory.setNamespaceAware(false);
Document doc = dfactory.newDocumentBuilder().parse(in);
// Set up an identity transformer to use as serializer.
Transformer serializer = TransformerFactory.newInstance().newTransformer();
serializer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
// Use the simple XPath API to select a nodeIterator.
NodeList nl = XPathAPI.selectNodeList(doc, xpath);
return nl;
//return getPathContent(in, xpath);
}
private static NodeList getPathContent(InputSource in, String xpath)
throws Exception
{
DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
dfactory.setNamespaceAware(false);
Document doc = dfactory.newDocumentBuilder().parse(in);
// Set up an identity transformer to use as serializer.
Transformer serializer = TransformerFactory.newInstance().newTransformer();
serializer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
// Use the simple XPath API to select a nodeIterator.
NodeList nl = XPathAPI.selectNodeList(doc, xpath);
return nl;
}
private void parseConfig()
{
try
{ //parse the keys and keyrefs out of the config file
NodeList keyNL = config.getPathContent("//key");
keys = new Key[keyNL.getLength()];
NodeList keyrefNL = config.getPathContent("//keyref");
keyrefs = new Keyref[keyrefNL.getLength()];
//get the keys
for(int i=0; i 0 && !args[0].equals("-q"))
{
System.out.println("EML Parser version 1.0");
System.out.println("Note that this parser DOES NOT VALIDATE your eml file ");
System.out.println("agains the schema. It only validates the ids and ");
System.out.println("references. To validate your eml file against the ");
System.out.println("schema, use SAXValidate or another xml parser.");
System.out.println("Usage: java org.ecoinformatics.eml.EMLParser [-q] [] ");
System.out.println("-----------------------------------------------------------------------");
}
if(args.length > 3)
{
System.out.println("Invalid number of arguments.");
}
String configfile = "";
String emlfile = "";
if(args.length == 3)
{
configfile = args[1];
emlfile = args[2];
System.out.println("emlfile: " + emlfile + " configfile: " + configfile);
}
else if(args.length == 2)
{
if(args[0].equals("-q"))
{
emlfile = args[1];
}
else
{
configfile = args[0];
emlfile = args[1];
}
}
else if(args.length == 1)
{
emlfile = args[0];
}
else if(args.length == 0)
{
System.out.println("Usage: java org.ecoinformatics.eml.EMLParser [-q] [] ");
System.out.println(" -q = quiet mode, little or no output");
System.out.println(" = use an alternate config file. The default is lib/config.xml");
System.out.println(" = the EML file to parse");
System.exit(0);
}
try
{
if(configfile.equals(""))
{
EMLParser parser = new EMLParser(new File(emlfile));
FileReader xmldoc = new FileReader(emlfile);
char [] ch = new char [4096];
StringWriter writer = new StringWriter();
int readNum = xmldoc.read(ch);
while (readNum != -1)
{
writer.write(ch, 0, readNum);
readNum = xmldoc.read(ch);
}
String str = writer.toString();
EMLParser readerParser = new EMLParser(str);
}
else
{
EMLParser parser = new EMLParser(new File(emlfile), new File(configfile));
}
System.out.println(emlfile + " has valid ids and references.");
}
catch(Exception e)
{
System.out.println("Error: " + e.getMessage());
}
}
/**
* class to represent a key
*/
private class Key
{
protected String selector; //xpath expression for the selector
protected String field; //xpath expression for the field in the selector
protected String name; //name of the key
Key(String name, String selector, String field)
{
this.name = name;
this.selector = selector;
this.field = field;
}
public String toString()
{
String s = "name: " + name + " selector: " + selector + " field: " + field;
return s;
}
}
/**
* class to represent a keyref
*/
private class Keyref
{
protected String name; //name of the keyref
protected String refer; //the key that we are refering to
protected String selector; //the selector for the keyref
protected String field; //the field in the selector
Keyref(String name, String refer, String selector, String field)
{
this.name = name;
this.refer = refer;
this.selector = selector;
this.field = field;
}
public String toString()
{
String s = "name: " + name + " refer: " + refer + " selector: " +
selector + " field: " + field;
return s;
}
}
}