/**
* '$RCSfile: EMLParser.java,v $'
* Copyright: 1997-2002 Regents of the University of California,
* University of New Mexico, and
* Arizona State University
* Sponsors: National Center for Ecological Analysis and Synthesis and
* Partnership for Interdisciplinary Studies of Coastal Oceans,
* University of California Santa Barbara
* Long-Term Ecological Research Network Office,
* University of New Mexico
* Center for Environmental Studies, Arizona State University
* Other funding: National Science Foundation (see README for details)
* The David and Lucile Packard Foundation
* For Details: http://knb.ecoinformatics.org/
*
* '$Author: walbridge $'
* '$Date: 2008-11-05 21:08:45 $'
* '$Revision: 1.16 $'
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.ecoinformatics.eml;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.net.URL;
import java.util.Hashtable;
import java.util.Stack;
import java.util.StringTokenizer;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import org.apache.xpath.XPathAPI;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import edu.ucsb.nceas.utilities.config.ConfigXML;
/**
* This is a parser to validate EML packages. It iss deprecated as of EML
* 2.2.0, for which users hsould use EMLValidator.validate() instead.
* This parser will validate all EML 2.2.0 and later documents by delegating
* to EMLValidator.validate(). For earlier EML versions, it uses the older
* validation logic in EMLParser, which does not fully implement all of the
* EML validation rules. This old behavior was kept in the interest of
* backwardsss compatibility, and any new documents should be validated using
* EMLValidator.validate().
* This EMLParser implementation tries to validate an EML package with references
* based on the following rules (but has bugs, see above):
*
* -
* If a user wants to reuse content to indicate the repetition of an object,
* a reference must be used. you cannot have two identical ids in a document.
*
* -
* "Local scope" is defined as identifiers unique only to a single instance
* document (if a document does not have a system or if scope is set to 'local'
* then all ids are defined as distinct content).
*
* -
* System scope is defined as identifiers unique to an entire data management
* system (if two documents share a system string, then any IDs in those two
* documents that are identical refer to the same object).
*
* -
* If an element references another element, it must not have an ID.
*
* -
* All EML packages must have the 'eml' module as the root.
*
* -
* The system and scope attribute are always optional except for at the
* 'eml' module where the scope attribute is fixed as 'system'. The scope
* attribute defaults to 'local' for all other modules.
*
*
*/
public class EMLParser {
private String parserName;
private ConfigXML config;
private Key[] keys;
private Keyref[] keyrefs;
private Hashtable idHash = new Hashtable();
private Hashtable idrefHash = new Hashtable();
private File xml;
/**
* parses an eml file
* @param xml the eml file to parse
* @deprecated use EMLValidator.validate() instead
*/
@Deprecated
public EMLParser(File xml) {
this(xml, null);
}
/**
* parses an eml file with an alternate configuration
* @param xml the eml file to parse
* @param configFile the alternate config file to use
* @deprecated use EMLValidator.validate() instead
*/
@Deprecated
public EMLParser(File xml, File configFile) throws EMLParserException {
this.xml = xml;
try {
FileReader reader = new FileReader(xml);
if (isRecentVersion(reader)) {
EMLValidator validator = new EMLValidator(xml);
boolean isValid = validator.validate();
if (!isValid) {
throw new EMLParserException(String.join("\n", validator.getErrors()));
}
} else {
if (configFile == null) {
config = getDefaultConfig();
} else {
try {
config = new ConfigXML(configFile.getAbsolutePath());
} catch(Exception e) {
throw new EMLParserException("Config file not found: " + e.getMessage());
}
}
parseConfig();
parseKeys();
parseKeyrefs();
}
} catch (FileNotFoundException e) {
throw new EMLParserException(e.getMessage());
} catch (IOException e) {
throw new EMLParserException(e.getMessage());
}
}
/**
* parses an eml document as a String
* @param xmlString the xml to parse
* @deprecated use EMLValidator.validate() instead
*/
@Deprecated
public EMLParser(String xmlString) throws EMLParserException, IOException {
if (xmlString == null || xmlString.equals("")) {
throw new EMLParserException("The EML string to be parsed is null or empty.");
}
StringReader reader = new StringReader(xmlString);
if (isRecentVersion(reader)) {
EMLValidator validator = new EMLValidator(xmlString);
boolean isValid = validator.validate();
if (!isValid) {
throw new EMLParserException(String.join("\n", validator.getErrors()));
}
} else {
config = getDefaultConfig();
parseConfig();
parseKeys(xmlString);
parseKeyrefs(xmlString);
}
}
/**
* Locate and return the default configuration for this parser.
* @return ConfigXML
*/
private ConfigXML getDefaultConfig() throws EMLParserException {
URL configFile = getClass().getResource("/config.xml");
ConfigXML defaultConfig = null;
try {
defaultConfig = new ConfigXML(configFile.openStream());
} catch(Exception e) {
throw new EMLParserException("Config file not found: " + e.getMessage());
}
return(defaultConfig);
}
/**
* Check if the provided EML document is a recent version, after 2.2.0.
* @param xml the EML text to be checked as a Reader
* @return boolean true if it is a recent version, false otherwise
*/
public boolean isRecentVersion(Reader xml) throws IOException {
boolean isRecent = false;
String namespace = EMLParserServlet.findNamespace(xml);
xml.close();
String version = namespace.split("\\-")[1];
SemVersion docVersion = new SemVersion(version);
SemVersion cutoffVersion = new SemVersion("2.2.0");
if (docVersion.compareTo(cutoffVersion) >= 0) {
isRecent = true;
}
return(isRecent);
}
/**
* make sure all ids are unique and hash the keys
*/
private void parseKeys() {
for(int i=0; i");
s.push(node);
}
while(!s.empty()) {
String node = (String)s.pop();
xml.append("").append(node).append(">");
}
return builder.parse(new InputSource(new StringReader(xml.toString())));
} catch(Exception e) {
throw new EMLParserException("Error building document fragment: " +
e.getMessage());
}
}
private void resolveKeys() {
}
/**
* Gets the content of a path in an xml file(form input stream)
*/
public static NodeList getPathContent(InputStream is, String xpath)
throws Exception {
InputSource in = new InputSource(is);
return getPathContent(in, xpath);
}
/**
* Gets the conten of a path in an xml document(from Reader)
*/
public static NodeList getPathContent(StringReader read, String xpath)
throws Exception {
InputSource in = new InputSource(read);
return getPathContent(in, xpath);
}
private static NodeList getPathContent(InputSource in, String xpath)
throws Exception {
DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
dfactory.setNamespaceAware(false);
Document doc = dfactory.newDocumentBuilder().parse(in);
// Set up an identity transformer to use as serializer.
Transformer serializer = TransformerFactory.newInstance().newTransformer();
serializer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
// Use the simple XPath API to select a nodeIterator.
NodeList nl = XPathAPI.selectNodeList(doc, xpath);
return nl;
}
private void parseConfig() {
try {
//parse the keys and keyrefs out of the config file
NodeList keyNL = config.getPathContent("//key");
keys = new Key[keyNL.getLength()];
NodeList keyrefNL = config.getPathContent("//keyref");
keyrefs = new Keyref[keyrefNL.getLength()];
//get the keys
for(int i=0; i 0 && !args[0].equals("-q")) {
System.out.println("EML Parser version 1.0");
System.out.println("Note that this parser DOES NOT VALIDATE your eml file ");
System.out.println("agains the schema. It only validates the ids and ");
System.out.println("references. To validate your eml file against the ");
System.out.println("schema, use SAXValidate or another xml parser.");
System.out.println("Usage: java org.ecoinformatics.eml.EMLParser [-q] [] ");
System.out.println("-----------------------------------------------------------------------");
}
if(args.length > 3) {
System.out.println("Invalid number of arguments.");
}
String configfile = "";
String emlfile = "";
if(args.length == 3) {
configfile = args[1];
emlfile = args[2];
System.out.println("emlfile: " + emlfile + " configfile: " + configfile);
} else if(args.length == 2) {
if(args[0].equals("-q")) {
emlfile = args[1];
} else {
configfile = args[0];
emlfile = args[1];
}
} else if(args.length == 1) {
emlfile = args[0];
} else if(args.length == 0) {
System.out.println("Usage: java org.ecoinformatics.eml.EMLParser [-q] [] ");
System.out.println(" -q = quiet mode, little or no output");
System.out.println(" = use an alternate config file. The default is lib/config.xml");
System.out.println(" = the EML file to parse");
System.exit(0);
}
try {
if(configfile.equals("")) {
EMLParser parser = new EMLParser(new File(emlfile));
FileReader xmldoc = new FileReader(emlfile);
char [] ch = new char [4096];
StringWriter writer = new StringWriter();
int readNum = xmldoc.read(ch);
while (readNum != -1) {
writer.write(ch, 0, readNum);
readNum = xmldoc.read(ch);
}
String str = writer.toString();
EMLParser readerParser = new EMLParser(str);
} else {
EMLParser parser = new EMLParser(new File(emlfile), new File(configfile));
}
System.out.println(emlfile + " has valid ids and references.");
} catch(Exception e) {
System.out.println("Error: " + e.getMessage());
}
}
/**
* class to represent a key
*/
private class Key {
protected String selector; //xpath expression for the selector
protected String field; //xpath expression for the field in the selector
protected String name; //name of the key
Key(String name, String selector, String field) {
this.name = name;
this.selector = selector;
this.field = field;
}
public String toString() {
String s = "name: " + name + " selector: " + selector + " field: " + field;
return s;
}
}
/**
* class to represent a keyref
*/
private class Keyref {
protected String name; //name of the keyref
protected String refer; //the key that we are refering to
protected String selector; //the selector for the keyref
protected String field; //the field in the selector
Keyref(String name, String refer, String selector, String field) {
this.name = name;
this.refer = refer;
this.selector = selector;
this.field = field;
}
public String toString() {
String s = "name: " + name + " refer: " + refer + " selector: " +
selector + " field: " + field;
return s;
}
}
}