/**
* '$RCSfile: XMLUtilities.java,v $'
* Copyright: 2002 Regents of the University of California
* Authors: @authors@
* Release: @release@
*
* '$Author: leinfelder $'
* '$Date: 2008-10-02 15:59:09 $'
* '$Revision: 1.17 $'
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the above
* copyright notice and the following two paragraphs appear in all copies
* of this software.
*
* IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
* FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
* THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
* PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
* CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
* ENHANCEMENTS, OR MODIFICATIONS.
*/
package edu.ucsb.nceas.utilities;
import edu.ucsb.nceas.utilities.OrderedMap;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringReader;
import java.util.Stack;
import java.util.Map;
import java.util.Iterator;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.TransformerException;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.apache.xpath.XPathAPI;
import org.apache.xpath.objects.XObject;
import org.w3c.dom.Attr;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.ContentHandler;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
//import org.apache.log4j.Logger;
public class XMLUtilities {
//output format used by print() method if none specified
private static String DEFAULT_OUTPUT_FORMAT = "UTF-8";
private static final String XPATH_SEPARATOR = "/";
private static final String ATTRIB_XPATH_SYMBOL = "@";
private static final String PREDICATE_OPEN_SYMBOL = "[";
private static final String PREDICATE_CLOSE_SYMBOL = "]";
// private static Logger log = Logger.getLogger(XMLUtilities.class.getName());
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* Given a string filename, attempts to load XML text from that file and
* parse it into a DOM tree. Then returns the root node of that tree
*
* @param cpRelativeFilename CLASSPATH-RELATIVE name of XML
* textfile to be read and parsed
*
* @return the root node of the DOM tree parsed from the input file
*
* @throws IOException if file cannot be opened or processed
*/
public static Node getXMLAsDOMTreeRootNode(String cpRelativeFilename)
throws IOException {
return (Node)(getXMLAsDOMDocument(cpRelativeFilename).getDocumentElement());
}
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* Given a Reader, attempts to load XML text from that Reader and
* parse it into a DOM tree. Then returns the root node of that tree
*
* @param xmlReader a java.io.Reader
from which XML text
* can be read and parsed
*
* @return the root node of the DOM tree parsed from the input file
*
* @throws IOException if file cannot be opened or processed
*/
public static Node getXMLReaderAsDOMTreeRootNode(Reader xmlReader)
throws IOException {
return (Node)(getXMLReaderAsDOMDocument(xmlReader).getDocumentElement());
}
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* Given a string filename, attempts to load XML text from that file and
* parse it into a DOM tree. Then returns the corresponding Document
*
* @param cpRelativeFilename CLASSPATH-RELATIVE name of XML
* textfile to be read and parsed
*
* @return the Document corresponding to the XML parsed from the input file
*
* @throws IOException if file cannot be opened or processed
*/
public static Document getXMLAsDOMDocument(String cpRelativeFilename)
throws IOException {
InputStreamReader isReader = null;
try {
isReader = IOUtil.getResourceAsInputStreamReader(cpRelativeFilename);
} catch(Exception e) {
FileNotFoundException fnfe = new FileNotFoundException("File \""
+cpRelativeFilename+"\" doesn't exist or cannot be read."
+"Original exceptions was: "+e);
fnfe.fillInStackTrace();
throw fnfe;
}
return getXMLReaderAsDOMDocument(isReader);
}
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* Given a Reader, attempts to load XML text from that Reader and
* parse it into a DOM tree. Then returns the corresponding Document
*
* @param xmlReader a java.io.Reader
from which XML text
* can be read and parsed
*
* @return the DOM Document containing the XML parsed from the input Reader
*
* @throws IOException if file cannot be opened or processed
*/
public static Document getXMLReaderAsDOMDocument(Reader xmlReader)
throws IOException {
Document doc = null;
if (xmlReader==null) {
IOException ioe1
= new IOException("getXMLReaderAsDOMDocument received a null Reader");
ioe1.fillInStackTrace();
throw ioe1;
}
InputSource in = new InputSource(xmlReader);
try {
doc = createDomParser().parse(in);
} catch(SAXException e) {
IOException ioe2 = new IOException( "getXMLReaderAsDOMDocument: "
+"nested SAXException parsing Reader: "+e);
ioe2.fillInStackTrace();
throw ioe2;
} catch(IOException ie) {
IOException ioe3 = new IOException( "getXMLReaderAsDOMDocument: "
+"IOException parsing Reader: "+ie);
ioe3.fillInStackTrace();
throw ioe3;
} catch(ParserConfigurationException pe) {
IOException ioe4 = new IOException( "getXMLReaderAsDOMDocument: "
+"nested ParserConfigurationException calling getDOMParser(): "+pe);
ioe4.fillInStackTrace();
throw ioe4;
} finally {
try { if (xmlReader!=null) xmlReader.close(); }
catch (IOException i) { i.printStackTrace(); }
}
return doc;
}
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* This method take an XPATH expression and follows it through a DOM tree,
* creating nodes along the way as needed, if they don't already exist. At
* the end of the XPATH it will create a TEXT_NODE and populate it with the
* String provided. If the text node already exists, its value is replaced
*
* @param rootNode the root node of a DOM subtree
*
* @param xpath A String
representation of an XPATH
* expression which defines the unique location of the
* required new node in the DOM tree. If this XPATH
* expression does not define a unique node, a
* DOMException is thrown
*
* @param textValue the text value to be insterted in the TEXT_NODE at the
* end of this xpath
*
* @throws org.w3c.dom.DOMException
if this XPATH
* expression does not define a unique node
*/
private static Stack nodesToCreate = new Stack();
public static void addTextNodeToDOMTree(Node rootNode, String xpath,
String textValue) throws DOMException, TransformerException {
// log.debug("XMLUtilities.addTextNodeToDOMTree(); xpath="+xpath
// +"\nrootnode = "+rootNode);
Node lastRealNode = getLastExistingNodeInXPath(rootNode, xpath);
//lastRealNode is now the last node in the xpath that actually exists
String nextNodeName = null;
Document doc = rootNode.getOwnerDocument();
while (!nodesToCreate.isEmpty()) {
nextNodeName = popNextNodeString(nodesToCreate);
// log.debug("in while loop; -> nextNodeName = "+nextNodeName);
if (nextNodeName==null) {
DOMException de2 = new DOMException(DOMException.SYNTAX_ERR,
"tried to create a node with null name!"
+"\n parent = "+lastRealNode.getNodeName());
de2.fillInStackTrace();
throw de2;
}
Element newElement = doc.createElement(stripXPathIndex(nextNodeName));
// log.debug("in while loop; -> newElement created = "+newElement);
lastRealNode.appendChild(newElement);
// log.debug("in while loop; -> DONE lastRealNode.appendChild(newElement)");
lastRealNode = newElement;
}
//check to see if last real node has any children already...
NodeList nl = lastRealNode.getChildNodes();
if (nl!=null && nl.getLength()>0) {
// if so, and if one of these is a text element, change it to new value
// NOTE: if there's more than one text node, only the first one gets
// changed!
Node[] childArray = getNodeListAsNodeArray(nl);
for (int i=0; i nextNodeName = "+nextNodeName);
if (nextNodeName==null) {
DOMException de2 = new DOMException(DOMException.SYNTAX_ERR,
"tried to create a node with null name!"
+"\n parent = "+lastRealNode.getNodeName());
de2.fillInStackTrace();
throw de2;
}
Element newElement = doc.createElement(stripXPathIndex(nextNodeName));
// log.debug("in while loop; -> newElement created = "+newElement);
lastRealNode.appendChild(newElement);
// log.debug("in while loop; -> DONE lastRealNode.appendChild(newElement)");
lastRealNode = newElement;
}
//check to see if last real node has any children already...
NodeList nl = lastRealNode.getChildNodes();
if (nl!=null && nl.getLength()>0) {
// if so, and if one of these is a text element, change it to new value
// NOTE: if there's more than one node, only the first one gets
// changed!
Node[] childArray = getNodeListAsNodeArray(nl);
childArray[0].getParentNode().replaceChild(newNode, childArray[0]);
} else {
//otherwise, just add a new node
lastRealNode.appendChild(newNode);
}
}
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* This method take an XPATH expression and follows it through a DOM tree,
* creating nodes along the way as needed, if they don't already exist. At
* the end of the XPATH it will create an ATTRIBUTE_NODE and populate it with
* the String provided
*
* @param rootNode the root node of a DOM subtree
*
* @param xpath A String
representation of an XPATH
* expression which defines the unique location of the
* required new node in the DOM tree. If this XPATH
* expression does not define a unique node, a
* DOMException is thrown. If the attribute exists, its
* value is changed to the new one
* NOTE - the xpath must define an
* attribute, using the "@" notation - for example:
* if the attribute name is my_attrib, then the xpath
* would look like this:
* /root/elem1/elem1a/@my_attrib
*
* @param textValue the text value to be insterted in the ATTRIBUTE_NODE at
* the end of this xpath
*
* @throws org.w3c.dom.DOMException
if this XPATH expression
* does not define a unique node
*/
public static void addAttributeNodeToDOMTree(Node rootNode, String xpath,
String attribValue) throws DOMException, TransformerException {
// log.debug("XMLUtilities.addAttributeNodeToDOMTree(); xpath="+xpath
// +"\nattribValue = "+attribValue
// +"\nrootNode = "+rootNode);
if (xpath.indexOf(ATTRIB_XPATH_SYMBOL)<0) {
DOMException de1 = new DOMException(DOMException.SYNTAX_ERR,
"call to addAttributeNodeToDOMTree() with an "
+"xpath that does not contain an attribute "
+" (no @ symbol found in xpath: "
+xpath+" )");
de1.fillInStackTrace();
throw de1;
}
Node lastRealNode = getLastExistingNodeInXPath(rootNode, xpath);
//lastRealNode is now the last node in the xpath that actually exists
String nextNodeName = null;
Document doc = rootNode.getOwnerDocument();
boolean attribExists = false;
// if we passed an xpath to getLastExistingNodeInXPath that *all* exists
// already, then nodesToCreate will be empty, but lastRealNode will have a
// value (it will be the node corresponding to the very last element in the
// xpath)
//
if (nodesToCreate.isEmpty() && lastRealNode!=null) {
if (lastRealNode.getNodeType()==Node.ATTRIBUTE_NODE) {
attribExists = true;
} else {
DOMException de2 = new DOMException(DOMException.HIERARCHY_REQUEST_ERR,
"xpath defines a node that is *NOT* an attribute node"
+"\n xpath = "+xpath
+";\n nodeName = "+lastRealNode.getNodeName()
+";\n nodeType = "+lastRealNode.getNodeType());
de2.fillInStackTrace();
throw de2;
}
nextNodeName = lastRealNode.getNodeName();
}
while (!nodesToCreate.isEmpty()) {
nextNodeName = popNextNodeString(nodesToCreate);
if (nextNodeName==null) {
DOMException de3 = new DOMException(DOMException.SYNTAX_ERR,
"tried to create a node with null name!"
+"\n parent = "+lastRealNode.getNodeName());
de3.fillInStackTrace();
throw de3;
}
if (nextNodeName.startsWith(ATTRIB_XPATH_SYMBOL)) {
//we've found the attribute - break and add it
break;
} else {
//keep looping - need to add nodes to path until we get to attribute
Element newElement = doc.createElement(stripXPathIndex(nextNodeName));
lastRealNode.appendChild(newElement);
lastRealNode = newElement;
}
}
//check if it already exists...
if (attribExists) {
// if so, change the value of the existing attribute
Attr attribNode = (Attr)lastRealNode;
attribNode.setValue(attribValue);
} else if (nextNodeName!=null
&& nextNodeName.startsWith(ATTRIB_XPATH_SYMBOL)) {
String attribName = (attribExists)?
nextNodeName : nextNodeName.substring(1);
Element lrnElem = (Element)lastRealNode;
//...otherwise, add it as a new attribute
lrnElem.setAttribute(attribName, attribValue);
} else {
DOMException de2 = new DOMException(DOMException.HIERARCHY_REQUEST_ERR,
"addAttributeNodeToDOMTree() was unable to "
+"create or update attribute at this xpath: "
+xpath+" )");
de2.fillInStackTrace();
throw de2;
}
}
/**
* removes all children from the given Node
.
* NOTE: Node is passed by reference, so the removal takes place on the
* original DOM. If this isn't what you want, then deep-clone your node
* first and send the cloned copy to this method!
*
* @param node the root node of the DOM subtree that will have all
* its children removed.NOTE: Node is passed by
* reference, so the removal takes place on the
* original DOM. If this isn't what you want, then
* deep-clone your nodefirst and send the cloned copy
* to this method!
*/
public static void removeAllChildren(Node node) {
if (node==null) return;
NodeList childNodes = node.getChildNodes();
if (childNodes==null || childNodes.getLength()<1) return;
int origLength = childNodes.getLength();
Node nextChild = null;
// NOTE: calling childNodes.item(i) actually *REMOVES* element 'i' from
// the NodeList and decrements the node count by 1, so we can't just call
// childNodes.getLength() in the for () statement, because it will give
// different answers each time
for (int i = origLength - 1; i > -1; i--) {
nextChild = childNodes.item(i);
if (nextChild!=null) node.removeChild(nextChild);
}
}
/**
* removes all predicates from the given String
xpath.
*
* @param xpath the String
xpath that will have its predicates
* removed. eg:
* input: /eml:eml/dataset[1]/project[1]/personnel[2]/role[1]
* output: /eml:eml/dataset/project/personnel/role
*
* @return the string with predicates removed, or null if original xpath null
*/
private static StringBuffer strippedXPathBuff = new StringBuffer();
//
public static String removeAllPredicates(String xpath) {
if (xpath==null) return null;
int pos;
if ((pos = xpath.indexOf("[")) < 0) return xpath;
strippedXPathBuff.delete(0, strippedXPathBuff.length());
for (; pos != -1; pos = xpath.indexOf("[")) {
strippedXPathBuff.append(xpath.substring(0, pos));
pos = 1 + xpath.indexOf("]");
if (pos < 1) pos = xpath.length();
xpath = xpath.substring(pos);
}
strippedXPathBuff.append(xpath);
return strippedXPathBuff.toString();
}
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* This method returns a unique Node.TEXT_NODE defined by the XPATH
* expression provided
*
* ***NOTE*** this method will also return the contents of CDATA
* sections
*
* @param rootNode the root node of a DOM subtree
*
* @param xpath A String
representation of an XPATH
* expression which defines the unique location of the
* required existing node in the DOM tree. If this XPATH
* expression does not define a unique node, a DOMException
* is thrown
*
* @return the Node
(that is a TEXT_NODE) uniquely defined by
* this xpath. null if the xpath does not point to a valid
* text node
*
* @throws org.w3c.dom.DOMException
if this XPATH expression
* does not define a unique node
*
* @throws TransformerException
if there is a problem executing
* the XPATH expression
*/
public static Node getTextNodeWithXPath(Node rootNode, String xpath)
throws DOMException, TransformerException {
// log.debug("XMLUtilities.getTextNodeWithXPath() called; xpath="+xpath);
Node targetNode = getNodeWithXPath(rootNode, xpath);
if (targetNode==null) {
// log.debug("node pointed to by xpath is null; returning null");
return null;
}
// targetNode *should* only have one child node, which the actual text it
// contains (- this is a strange confusing DOM2 thang - the element's text
// value is in fact considered to be a subnode of the element)
NodeList targetList = targetNode.getChildNodes();
// if node doesn't exist, return null
if (targetList.getLength()==0) {
// log.debug("text node doesn't yet exist: "+xpath);
return null;
}
// Note that it is possible to have "mixed" content - i.e. the text
// "node(s)" PLUS real sub-elements, hence the next check
for (int nodeIndex=0; nodeIndex < targetList.getLength(); nodeIndex++) {
Node textNode = targetList.item(nodeIndex);
if (textNode.getNodeType()==Node.TEXT_NODE
|| textNode.getNodeType()==Node.CDATA_SECTION_NODE) {
// log.debug("FOUND VALUE = "+textNode.getNodeValue());
return textNode;
}
}
return null;
}
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* This method returns a unique Node.ATTRIBUTE_NODE defined by the
* XPATH expression provided
*
* @param rootNode the root node of a DOM subtree
*
* @param xpath A String
representation of an XPATH
* expression which defines the unique location of the
* required ATTRIBUTE node in the DOM tree. If this XPATH
* expression does not define a unique node, a DOMException
* is thrown
*
* @return the Node
(that is an ATTRIBUTE_NODE) uniquely defined
* by this xpath. null if the xpath does not point to a
* valid attribute node
*
* @throws org.w3c.dom.DOMException
if this XPATH expression
* does not define a unique node
*
* @throws TransformerException
if there is a problem executing
* the XPATH expression
*/
public static Node getAttributeNodeWithXPath(Node rootNode, String xpath)
throws DOMException, TransformerException {
// log.debug("XMLUtilities.getAttributeNodeWithXPath() called; xpath="+xpath);
Node targetNode = getNodeWithXPath(rootNode, xpath);
if (targetNode==null) {
// log.debug("node pointed to by xpath is null; returning null");
return null;
}
// if not an attribute node, throw an exception
if (targetNode!=null && targetNode.getNodeType()!=Node.ATTRIBUTE_NODE) {
DOMException de = new DOMException(DOMException.INDEX_SIZE_ERR,
"found a node at this xpath: "+xpath
+" that is *NOT* an attribute node!");
de.fillInStackTrace();
throw de;
}
return targetNode;
}
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* This method returns a unique node defined by the XPATH expression
* provided
*
* @param rootNode the root node of a DOM subtree
*
* @param xpath A String
representation of an XPATH
* expression which defines the unique location of the
* required existing node in the DOM tree. If this XPATH
* expression does not define a unique node, a DOMException
* is thrown
*
* @return the Node that is uniquely defined by this xpath. null if the xpath
* does not point to a valid node
*
* @throws org.w3c.dom.DOMException
if this XPATH expression
* does not define a unique node
*
* @throws TransformerException
if there is a problem executing
* the XPATH expression
*/
public static Node getNodeWithXPath(Node rootNode, String xpath)
throws DOMException, TransformerException {
// log.debug("XMLUtilities.getNodeWithXPath() called; xpath="+xpath);
NodeList nodeList = getNodeListWithXPath(rootNode, xpath);
if (nodeList==null) {
// log.debug("nodeList is null; returning null");
return null;
}
// log.debug("nodeList.getLength() = "+nodeList.getLength());
if (nodeList.getLength() > 1) {
// XPATH expression must point to a unique
// DOM node, otherwise we throw an exception:
DOMException de = new DOMException(DOMException.INDEX_SIZE_ERR,
"Non-unique XPATH expression: "+xpath+"\n ("
+nodeList.getLength()+" nodes match");
de.fillInStackTrace();
throw de;
}
return nodeList.item(0);
}
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* This method returns a NodeList
of nodes matching the XPATH
* expression provided
*
* @param rootNode the root node of a DOM subtree
*
* @param xpath A String
representation of an XPATH
* expression which defines the location of one or more
* required existing nodes in the DOM tree.
*
* @return the NodeList
that contains all the nodes matching the
* XPATH expression provided. null if the xpath does not
* point to any valid nodes
*
* @throws TransformerException
if there is a problem executing
* the XPATH expression
*/
public static NodeList getNodeListWithXPath(Node rootNode, String xpath)
throws TransformerException {
// log.debug("XMLUtilities.getNodeListWithXPath() called; xpath="+xpath
// +"\nrootnode = "+rootNode);
NodeList nodeList = null;
if (xpath==null) {
TransformerException t
= new TransformerException(
"XMLUtilities.getNodeListWithXPath() received NULL xpath");
t.fillInStackTrace();
throw t;
}
if (rootNode==null) {
TransformerException t
= new TransformerException(
"XMLUtilities.getNodeListWithXPath() received NULL rootNode");
t.fillInStackTrace();
throw t;
}
try {
nodeList = XPathAPI.selectNodeList(rootNode, xpath.trim(), rootNode);
} catch (TransformerException e) {
// e.printStackTrace();
// log.error("TransformerException doing XPath search for nodelist"
// +" at xpath: "+xpath+"\nException is: "+e);
throw e;
}
if (nodeList==null) {
// log.debug("NULL NodeList received - API docs say this should"
// +" never happen! xpath = "+xpath);
return null;
} else if (nodeList.getLength()<1) {
// log.debug("NodeList length = 0; No nodes exist for this xpath: "
// +xpath);
return null;
}
return nodeList;
}
/**
* Utility method to get a NodeList as an array of Nodes. This is needed
* because we have to pull the values from the NodeList in reverse order,
* since the call to the item(i) method actually *removes* that item from the
* NodeList instead of just "peek"ing at it. This in turn reduces the
* length of the NodeList and re-indexes all the remaining entries...
*
* @param nList the NodeList to be converted to an array
*
* @return the Node[] array representation of this NodeList, in the
* original order
*/
public static Node[] getNodeListAsNodeArray(NodeList nList) {
if (nList==null) return null;
int nListLength = nList.getLength();
if (nListLength==0) return null;
Node[] nodeArray = new Node[nListLength];
for (int i = nListLength-1; i>-1; i--) {
nodeArray[i] = nList.item(i);
}
return nodeArray;
}
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* This method can walk a DOM subtree (based at the passed Node), and
* return it as a string
*
* @param node the root node of a DOM subtree
*
* @return String
representation of the DOM tree
*/
public static String getDOMTreeAsString(Node node) {
return getDOMTreeAsString(node, false);
}
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* This method can walk a DOM subtree (based at the passed Node), and
* return it as a string
*
* @param node the root node of a DOM subtree
*
* @param preserveWhitespace - if set to true, will preserve spaces.
* NOTES:- *false* - Setting
* this to false means that any elements that
* contain only whitespace will be printed out as
* being *empty*, but the layout of the output
* will have "nice" line endings and indentation.
*
- *true* - Setting it to true can mess
* up line endings/formatting of output, but will
* mean that elements containing only whitespace
* will be printed out in their original form.
*
*
* @return String
representation of the DOM tree
*/
public static String getDOMTreeAsString(Node node, boolean preserveWhitespace) {
if (node==null) return null;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintWriter printWriter = new PrintWriter(baos);
try {
print(node, printWriter, DEFAULT_OUTPUT_FORMAT, preserveWhitespace);
} catch (Exception e) {
String msg = "getDOMTreeAsString() - unexpected Exception: "+e+"\n";
// log.error(msg);
printWriter.println(msg);
e.printStackTrace(printWriter);
} finally {
try {
printWriter.flush();
baos.flush();
baos.close();
printWriter.close();
} catch (IOException ioe) {}
}
return baos.toString();
}
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* This method can walk a DOM subtree (based at the passed Node), and
* return it as a java.io.Reader
*
* @param node the root node of a DOM subtree
*
* @param preserveWhitespace - if set to true, will preserve spaces.
* NOTES:- *false* - Setting
* this to false means that any elements that
* contain only whitespace will be printed out as
* being *empty*, but the layout of the output
* will have "nice" line endings and indentation.
*
- *true* - Setting it to true can mess
* up line endings/formatting of output, but will
* mean that elements containing only whitespace
* will be printed out in their original form.
*
*
* @return Reader
representation of the DOM tree
*/
public static Reader getDOMTreeAsReader(Node node, boolean preserveWhitespace) {
if (node==null) return null;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintWriter printWriter = new PrintWriter(baos);
try {
print(node, printWriter, DEFAULT_OUTPUT_FORMAT, preserveWhitespace);
} catch (Exception e) {
String msg = "getDOMTreeAsReader() - unexpected Exception: "+e+"\n";
// log.error(msg);
printWriter.println(msg);
e.printStackTrace(printWriter);
} finally {
try {
printWriter.flush();
baos.flush();
baos.close();
printWriter.close();
} catch (IOException ioe) {}
}
StringReader sreader = new StringReader(baos.toString());
Reader DOMreader = (Reader) sreader;
return DOMreader;
}
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* This method can walk a DOM subtree (based at the passed Node), and
* print it to the PrintWriter provided, using the encoding defined
* in the DEFAULT_OUTPUT_FORMAT variable elsewhere in this class.
* Does *not* flush or close PrintWriter after use
*
* @param node the root node of a DOM subtree
* @param PrintWriter
to which output will be printed
*/
public static void print(Node node, PrintWriter printWriter) {
print(node, printWriter, DEFAULT_OUTPUT_FORMAT);
}
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* This method can walk a DOM subtree (based at the passed Node), and
* print it to the PrintWriter provided, using the encoding provided.
* Does *not* flush or close PrintWriter after use
*
* @param node the root node of a DOM subtree
*
* @param printWriter the PrintWriter
to which output will be
* printed
*
* @param encoding the String
defining the output format
* (e.g. UTF-8 etc)
*/
public static void print(Node node,
PrintWriter printWriter, String encoding) {
print(node, printWriter, encoding, false);
}
/**
* NOTE - NONE OF THESE METHODS ARE THREAD_SAFE.
* This method can walk a DOM subtree (based at the passed Node), and
* print it to the PrintWriter provided, using the encoding provided.
* Does *not* flush or close PrintWriter after use
*
* @param node the root node of a DOM subtree
*
* @param printWriter the PrintWriter
to which output will be
* printed
*
* @param encoding the String
defining the output format
* (e.g. UTF-8 etc)
*
* @param preserveWhitespace - if set to true, will preserve spaces.
* NOTES:- *false* - Setting
* this to false means that any elements that
* contain only whitespace will be printed out as
* being *empty*, but the layout of the output
* will have "nice" line endings and indentation.
*
- *true* - Setting it to true can mess
* up line endings/formatting of output, but will
* mean that elements containing only whitespace
* will be printed out in their original form.
*
*
*/
public static void print(Node node, PrintWriter printWriter,
String encoding, boolean preserveWhitespace) {
if (node==null) return;
if (printWriter==null) return;
if (encoding==null) return;
if (encoding.trim().equals("")) encoding = DEFAULT_OUTPUT_FORMAT;
try {
// Read the entire document into memory
Document document = node.getOwnerDocument();
if (document==null) return;
OutputFormat format
= new OutputFormat(document, encoding, true);
format.setLineSeparator(System.getProperty("line.separator"));
format.setLineWidth(72);
format.setIndent(2);
format.setPreserveSpace(preserveWhitespace);
XMLSerializer serializer
= new XMLSerializer(printWriter, format);
serializer.serialize(document);
} catch (IOException e) {
// log.error("IOException doing print(): "+e);
e.printStackTrace(printWriter);
}
}
/**
* Returns an edu.ucsb.nceas.utilities.OrderedMap
containing the
* entire DOM tree rooted at the rootNode, encoded as key/value pairs, where
* the "key" is the XPath of the node, and the "value" is its text value.
*
* @param rootNode the root node of the DOM tree to be encoded as
* XPath/Value mappings
*
* @return returnNVPMap an edu.ucsb.nceas.utilities.OrderedMap
* containing the resulting xpath/value pairs in the
* correct order
*/
public static OrderedMap getDOMTreeAsXPathMap(Node rootNode) {
return getDOMTreeAsXPathMap(rootNode, "");
}
/**
* Returns an edu.ucsb.nceas.utilities.OrderedMap
containing the
* entire DOM tree rooted at the rootNode, encoded as key/value pairs, where
* the "key" is the XPath of the node, and the "value" is its text value. The
* "keys" are prefixed with the relative xpath that is provided, rather than "/"
*
* @param rootNode the root node of the DOM tree to be encoded as
* XPath/Value mappings
*
* @param path the xpath that is prefixed to all the keys in the
* XPath/Value mapping that is returned
*
* @return returnNVPMap an edu.ucsb.nceas.utilities.OrderedMap
* containing the resulting xpath/value pairs in the
* correct order
*/
public static OrderedMap getDOMTreeAsXPathMap(Node rootNode, String path) {
if (rootNode==null) return null;
if(path == null) path = "";
OrderedMap returnMap = new OrderedMap();
getDOMTreeAsXPathMap(rootNode, path+"/"+rootNode.getNodeName(), returnMap);
return returnMap;
}
/**
* Given a DOM root Node and a Map of name=value pairs containing xpaths and
* element/attribute values, this method inserts corresponding nodes into the
* DOM document
*
* @param rootNode the root Node of the DOM Document to which the values in
* the Map will be added - NOTE that this method has no return value
* the results are added to this DOM Document
*
* @param xpathMap the Map containing the name=value pairs comprising xpaths
* and element/attribute values
*
* @throws DOMException if something goes wrong
*
* @throws TransformerException if something goes wrong
*/
public static void getXPathMapAsDOMTree(Map xpathMap, Node rootNode)
throws DOMException,
TransformerException {
if (xpathMap==null || rootNode==null) return;
String nextKey = null;
String nextVal = null;
Iterator it = xpathMap.keySet().iterator();
if (it==null) return;
while (it.hasNext()) {
nextKey = (String)it.next();
if (nextKey==null || nextKey.trim().equals("")) continue;
nextVal = (String)xpathMap.get(nextKey);
if ( nextKey.indexOf(ATTRIB_XPATH_SYMBOL) > 0
&& nextKey.indexOf(ATTRIB_XPATH_SYMBOL) > nextKey.lastIndexOf("/")) {
// IT'S AN ATTRIBUTE //////////
Node attribNode = getAttributeNodeWithXPath(rootNode, nextKey);
if (attribNode==null) {
// if node doesn't exist, we need to add it to the DOM tree
// log.debug("Attribute node doesn't exist - need to create");
addAttributeNodeToDOMTree(rootNode, nextKey, nextVal);
} else {
attribNode.setNodeValue(nextVal);
// log.debug("Existing attribute node set to new value: "
// +attribNode.getNodeValue());
}
} else {
// IT'S A TEXT NODE //////////
Node textNode = getTextNodeWithXPath(rootNode, nextKey);
if (textNode==null) {
// if node doesn't exist, we need to add it to the DOM tree
// log.debug("Text node doesn't exist - need to create");
addTextNodeToDOMTree(rootNode, nextKey, nextVal);
} else {
textNode.setNodeValue(nextVal);
// log.debug("Existing text node set to new value: "
// +textNode.getNodeValue());
}
}
}
}
/** Normalizes the given string.
* note that this version explicitly consideres
* characters that have codes less than 32 and
* greater than 128. This proved necessary in morpho
* due to the possibility of pasting text from other
* applications (e.g. Word, PDFs) that use these spceial
* ascii characters. (Xalan seems particularly sensitive to
* unusual white-space characters)
*/
public static String normalize(Object ss) {
String s = "";
s = (String)ss;
StringBuffer str = new StringBuffer();
int len = (s != null) ? s.length() : 0;
for (int i = 0; i < len; i++) {
char ch = s.charAt(i);
switch (ch) {
case '<': {
str.append("<");
break;
}
case '>': {
str.append(">");
break;
}
case '&': {
str.append("&");
break;
}
case '"': {
str.append(""");
break;
}
case '\r':
case '\t':
case '\n': {
if (false) {
str.append("");
str.append(Integer.toString(ch));
str.append(';');
break;
}
// else, default append char
break;
}
default: {
if ((ch<128)&&(ch>31)) {
str.append(ch);
}
else if (ch<32) {
if (ch== 10) {
str.append(ch);
}
if (ch==13) {
str.append(ch);
}
if (ch==9) {
str.append(ch);
}
// otherwise skip
}
else {
str.append("");
str.append(Integer.toString(ch));
str.append(';');
}
}
}
}
String temp = str.toString();
temp = temp.trim();
if (temp.length()<1) temp = " ";
return temp;
} // normalize(String):String
/**
* This method checks to see if an evaluation of the XPath results in
* a String
*
* @param contextNode the context node of a DOM subtree where the
* XPath evaluation starts
*
* @param xpath A String
representation of an XPATH
* expression
*/
public static boolean isXPathEvalAString(Node contextNode, String XPath)
throws TransformerException {
boolean res = false;
XObject xobj = XPathAPI.eval(contextNode, XPath);
if (xobj.getType()==XObject.CLASS_STRING) res=true;
return res;
}
/**
* This method checks to see if an evaluation of the XPath results in
* a boolean
*
* @param contextNode the context node of a DOM subtree where the
* XPath evaluation starts
*
* @param xpath A String
representation of an XPATH
* expression
*/
public static boolean isXPathEvalABoolean(Node contextNode, String XPath)
throws TransformerException {
boolean res = false;
XObject xobj = XPathAPI.eval(contextNode, XPath);
if (xobj.getType()==XObject.CLASS_BOOLEAN) res=true;
return res;
}
/**
* This method checks to see if an evaluation of the XPath results in
* a number
*
* @param contextNode the context node of a DOM subtree where the
* XPath evaluation starts
*
* @param xpath A String
representation of an XPATH
* expression
*/
public static boolean isXPathEvalANumber(Node contextNode, String XPath)
throws TransformerException {
boolean res = false;
XObject xobj = XPathAPI.eval(contextNode, XPath);
if (xobj.getType()==XObject.CLASS_NUMBER) res=true;
return res;
}
/**
* This method checks to see if an evaluation of the XPath results in
* a Nodeset
*
* @param contextNode the context node of a DOM subtree where the
* XPath evaluation starts
*
* @param xpath A String
representation of an XPATH
* expression
*/
public static boolean isXPathEvalANodeset(Node contextNode, String XPath)
throws TransformerException {
boolean res = false;
XObject xobj = XPathAPI.eval(contextNode, XPath);
if (xobj.getType()==XObject.CLASS_NODESET) res=true;
return res;
}
/**
* This method checks to see if an evaluation of the XPath results in
* a Null
*
* @param contextNode the context node of a DOM subtree where the
* XPath evaluation starts
*
* @param xpath A String
representation of an XPATH
* expression
*/
public static boolean isXPathEvalANull(Node contextNode, String XPath)
throws TransformerException {
boolean res = false;
XObject xobj = XPathAPI.eval(contextNode, XPath);
if (xobj.getType()==XObject.CLASS_NULL) res=true;
return res;
}
/**
* This method evaluates an XPath expression and retruns information about
* the type of the reult i.e. is it a boolean, a string, a nodeset, etc.
* Designed primarily for testing since result infor is displayed in an error
* dialog.
*
* @param contextNode the context node of a DOM subtree where the
* XPath evaluation starts
*
* @param xpath A String
representation of an XPATH
* expression
*/
public static void xPathEvalTypeTest( Node contextNode, String xpath) {
try{
XObject xobj = XPathAPI.eval(contextNode, xpath);
if (xobj.getType()==XObject.CLASS_BOOLEAN) {
Log.debug(1,"Boolean: "+xobj.bool());
}
else if (xobj.getType()==XObject.CLASS_STRING) {
Log.debug(1,"String: "+xobj.str());
}
else if (xobj.getType()==XObject.CLASS_NUMBER) {
Log.debug(1,"Number: "+xobj.num());
}
if (xobj.getType()==XObject.CLASS_NODESET) {
NodeList ns = xobj.nodelist();
Log.debug(1,"Nodeset: nodeset length: "+ns.getLength());
}
if (xobj.getType()==XObject.CLASS_NULL) {
Log.debug(1,"Null: ");
}
}
catch (Exception w) {
Log.debug(4,"exception in evalXPathTest --- "+w.toString());
}
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
//
// P R I V A T E C O N V E N I E N C E M E T H O D S
//
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
/**
* Given a starting node and its XPATH, returns the entire DOM tree rooted at
* this node, encoded as key/value pairs in the Map provided, where the
* "key" is the XPath of the node, and the "value" is its text value
* (*does* include CDATA sections).
*
* Procedure is as follows:
* Get attributes, and for each attribute, create a NVP
* with xpath/@attribute=value
* then get this node's children
* get subset that are *not* text nodes; for each, RECURSE
* get subset that *are* text nodes; for each, check if empty (or just
* \n etc.) and if so discard. Otherwise, concatenate them and create a
* NVP with xpath/@attribuite=concat_values
*
* @param startNode the starting node which is the root of the DOM tree to
* be encoded
*
* @param xpath the XPath of this starting node
*
* @param returnNVPMap the Map in which the resulting key/value pairs will
* be stored - NOTE that this method has no return value -
* the results are added to this Map that you provide
*/ //re-use one String instead of creating thousands of new objects:
private static final String BLANK = "";
//re-use one StringBuffer instead of creating thousands of new objects:
private static final StringBuffer buff = new StringBuffer();
//
private static void getDOMTreeAsXPathMap(Node startNode, String xpath,
Map returnNVPMap) {
Node next = null;
String nextAttribVal = null;
// then get this node's attributes
NamedNodeMap attribList = startNode.getAttributes();
Node[] attribNodeArray = XMLUtilities.getNamedNodeMapAsNodeArray(attribList);
if (attribNodeArray!=null) {
int attribNodeArrayLength = attribNodeArray.length;
for (int i = 0; i < attribNodeArrayLength; i++) {
// and for each attribute, create a NVP with xpath/@attribuite=value
next = attribNodeArray[i];
if (next!=null) {
buff.delete(0,buff.length());
buff.append(xpath);
buff.append(XPATH_SEPARATOR);
buff.append(ATTRIB_XPATH_SYMBOL);
buff.append(next.getNodeName());
nextAttribVal = StringUtil.stripTabsNewLines(next.getNodeValue());
returnNVPMap.put(buff.toString(),nextAttribVal);
}
}
}
// then get this node's children
NodeList childNodes = startNode.getChildNodes();
String nextString = null;
StringBuffer textNodeBuff = new StringBuffer(); //textNodeBuff used to
//concat textnode contents
Node[] childNodeArray = XMLUtilities.getNodeListAsNodeArray(childNodes);
if (childNodeArray!=null) {
int numChildren = childNodeArray.length;
String[] currentNamesArray = new String[numChildren];
for (int i = 0; i < numChildren; i++) {
next = childNodeArray[i];
if (next!=null) {
switch (next.getNodeType()) {
// get subset that *are* text nodes; for each, check if empty (or
// just \n etc.) and if so discard. Otherwise, concatenate them and
// create a NVP with xpath/@attribute=concat_values
case Node.TEXT_NODE:
case Node.CDATA_SECTION_NODE:
{
nextString = next.getNodeValue();
if (nextString!=null
&& !BLANK.equals(StringUtil.stripAllWhiteSpace(nextString))) {
textNodeBuff.append(nextString);
}
break;
}
// get subset that are *not* text nodes; for each, RECURSE
default:
{
buff.delete(0,buff.length());
buff.append(xpath);
buff.append(XPATH_SEPARATOR);
buff.append(next.getNodeName());
buff.append(getNextXPathPredicate(next.getNodeName(),
currentNamesArray, i));
getDOMTreeAsXPathMap(next, buff.toString(), returnNVPMap);
break;
}
}
}
}
}
if (textNodeBuff.length()>0) returnNVPMap.put(xpath,textNodeBuff.toString());
}
/**
* Utility method to get a NamedNodeMap as an array of Nodes. This is needed
* because we have to pull the values from the NamedNodeMap in reverse order,
* since the call to the item(i) method actually *removes* that item from the
* NamedNodeMap instead of just "peek"ing at it. This in turn reduces the
* length of the NamedNodeMap and re-indexes all the remaining entries...
*
* @param nMap the NamedNodeMap to be converted to an array
*
* @return the Node[] array representation of this NamedNodeMap, in the
* original order
*/
private static Node[] getNamedNodeMapAsNodeArray(NamedNodeMap nMap) {
if (nMap==null) return null;
int nMapLength = nMap.getLength();
if (nMapLength==0) return null;
Node[] nodeArray = new Node[nMapLength];
for (int i = nMapLength-1; i>-1; i--) {
nodeArray[i] = nMap.item(i);
}
return nodeArray;
}
//
// utility method for getDOMTreeAsNVP() method:
// keeps a tally of the XPath keys that have already been used, and if the
// passed key duplicates a previous one, increment the [n] predicate before
// adding this one
//
// @param currentNodeName the String representation of the current node name
//
// @param currentNamesArray the String[] array containing the list of node
// names already encountered in this recursive pass
//
// @param currentNamesArrayIndex the int array index of the name that was
// last added to the currentNamesArray
//
private static final StringBuffer predicateBuff = new StringBuffer();
//
private static String getNextXPathPredicate(String currentNodeName,
String[] currentNamesArray, int currentNamesArrayIndex) {
if (currentNodeName==null || currentNodeName.equals("")) return "";
if (currentNamesArrayIndex > currentNamesArray.length - 1) return "";
currentNamesArray[currentNamesArrayIndex] = currentNodeName;
int predicate = 0;
for (int i=0; i -1) {
xpathSingleNodeName = xpathSingleNodeName.substring(0,bracketIndex);
}
return xpathSingleNodeName;
}
// given a DOM rooted at the rootNode, and an xpath of the form:
// /root/elem_a/subelem_b/subsubelem_c/lastelem_d
// this method starts at the last element (lastelem_d) and checks to see if
// that element actually exists in the DOM. If not, the last element is put
// onto a stack (nodesToCreate), and the process is repeated for the next
// element up the path (subsubelem_c). If an element in the xpath is found to
// be existing in the DOM, that node is returned. The calling method can then
// use the stack (nodesToCreate) to create the nodes that are not present, as
// children of the last existing node (i.e. the return value from this method)
//
private static Node getLastExistingNodeInXPath(Node rootNode, String xpath)
throws DOMException, TransformerException {
if ( rootNode==null || xpath==null
|| xpath.trim().equals("") || xpath.indexOf("/") < 0
|| xpath.indexOf("*")>-1 || xpath.indexOf(rootNode.getNodeName())<0) {
DOMException de1 = new DOMException(DOMException.SYNTAX_ERR,
"XPATH expression does not define a unique node; "
+"\n xpath = "+xpath
+"\n rootNode = "+rootNode);
de1.fillInStackTrace();
throw de1;
}
Node lastRealNode = null;
nodesToCreate.clear();
// starting condition: existingPath = xpath
// stop repeating when: lastRealNode!=null
// each loop, do: existingPath = stepBackUpPath(existingPath, nodesToCreate)
for (String existingPath = xpath;
lastRealNode==null;
existingPath = stepBackUpPath(existingPath, nodesToCreate)) {
//check if trimmed xpath points to a real node
lastRealNode = getNodeWithXPath(rootNode, existingPath);
if (existingPath.indexOf("/")<0) break;
}
//lastRealNode is now the last node in the xpath that actually exists
//note that lastRealNode's name has also been added to the nodesToCreate
//stack - (this is how for () loop works - does increment before next check)
//so we need to pop & discard it:
nodesToCreate.pop();
if (lastRealNode==null) {
DOMException de3 = new DOMException(DOMException.SYNTAX_ERR,
"XPATH expression does not contain any existing nodes "
+"- not even the root; "
+"\n xpath = "+xpath
+"\n rootNode name = "+rootNode.getNodeName());
de3.fillInStackTrace();
throw de3;
}
return lastRealNode;
}
// Given a String representation of an xpath, removes the last path element,
// removes any [square bracket] indices, and pushes it to the Stack provided.
// The Stack is changed by reference -
// the edited xpath String is the return value for this method
private static String stringToPush = null;
//
private static String stepBackUpPath(String existingPath, Stack nodesToCreate) {
stringToPush = existingPath.substring(existingPath.lastIndexOf("/") + 1);
nodesToCreate.push(stringToPush);
// log.debug("stepBackUpPath just added to Stack: "+nodesToCreate.peek());
// log.debug("Stack is now: "+nodesToCreate.toString());
return existingPath.substring(0, existingPath.lastIndexOf("/"));
}
}