package edu.ucsb.nceas.osti_elink; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.net.URLEncoder; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.Properties; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.codec.binary.Base64; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.HttpDelete; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpPut; import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.util.EntityUtils; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Text; import org.w3c.dom.ls.DOMImplementationLS; import org.w3c.dom.ls.LSOutput; import org.w3c.dom.ls.LSSerializer; import org.xml.sax.SAXException; /** * OSTIElinkService provides access to the identifier service maintained by OSTI. * Please see the documentation at https://www.osti.gov/elink/241-6api.jsp * Its child classes should overwrite those four methods: * mintIdentifier, getMetadata, getStatus, and setMetadata * @author tao * */ public abstract class OSTIElinkService { public static final String DOI = "doi"; public static final String OSTI_ID = "osti_id"; public static final String SAVED = "Saved"; public static final String PENDING = "Pending"; protected static final int GET = 1; protected static final int PUT = 2; protected static final int POST = 3; protected static final int DELETE = 4; private static final int CONNECTIONS_PER_ROUTE = 8; private static final String minimalMetadataFile = "minimal-osti.xml"; private static final String STATUS = "status"; private static final String SUCCESS = "SUCCESS"; private String username = null; private String password = null; protected String baseURL = "https://www.osti.gov/elink/2416api"; protected CloseableHttpClient httpClient = null; protected byte[] encodedAuthStr = null; private Document minimalMetadataDoc = null; private String originalDefaultSiteCode = null; private String currentDefaultSiteCode = "test"; protected Properties properties = null; protected static Log log = LogFactory.getLog(OSTIElinkService.class); /** * Constructor * @param username the username of the account which can access the OSTI service * @param password the password of the account which can access the OSTI service * @param baseURL the url which specifies the location of the OSTI service */ public OSTIElinkService(String username, String password, String baseURL) { this.username = username; this.password = password; if (baseURL != null && !baseURL.trim().equals("")) { this.baseURL = baseURL; } httpClient = createThreadSafeClient(); String authentication = username + ":" + password; encodedAuthStr = Base64.encodeBase64(authentication.getBytes(Charset.forName("ISO-8859-1"))); } /** * Create a new, unique, opaque identifier by requesting the OSTI elink service * @param siteCode a pre-dinfined site code which associates doi prefixes.If it is null, the default * ess-dive value will be used. * @return the identifier generated by OSTI for this site code * @throws OSTIElinkException */ public String mintIdentifier(String siteCode) throws OSTIElinkException { String identifier = null; String minimalMetadata = buildMinimalMetadata(siteCode); log.debug("the minmal metadata is " + minimalMetadata); log.debug("the base url is " + baseURL); byte[] reponse = sendRequest(POST, baseURL, minimalMetadata); log.debug("OSTIElinkService.mintIdentifier - the response from the OSTI service is:\n " + new String(reponse)); Document doc = null; try { doc = generateDOM(reponse); } catch (Exception e) { //The response is not a xml string. We return the response as an exception throw new OSTIElinkException("OSTIElinkService.mintIdentifier - Error: " + new String(reponse)); } String status = getElementValue(doc, STATUS); String id = getElementValue(doc, DOI); if (status != null && status.equalsIgnoreCase(SUCCESS) && id != null && !id.trim().equals("")) { identifier = DOI + ":" + id; } else { throw new OSTIElinkException("OSTIElinkService.mintIdentifier - Error: " + new String(reponse)); } log.debug("OSTIElinkService.mintIdentifier - the generated identifier is " + identifier); return identifier; } /** * Get the metadata associated with the given identifier, which should be a doi. An OSTIElinkNotFoundException * will be thrown if the identifier can't be found. It may contains multiple records. * @param doi the identifier for which the metadata should be returned * @return the metadata in the xml format * @throws OSTIElinkException */ public String getMetadata(String doi) throws OSTIElinkException { return getMetadata(doi, DOI); } /** * Get the metadata associated with the osti id. An OSTIElinkNotFoundException * will be thrown if the identifier can't be found. * @param ostiId the osti id for which the metadata should be returned * @return the metadata in the xml format * @throws OSTIElinkException */ protected String getMetadataFromOstiId(String ostiId) throws OSTIElinkException { return getMetadata(ostiId, OSTI_ID); } /** * Get the metadata associated with the given identifier. An OSTIElinkNotFoundException * will be thrown if the identifier can't be found. * @param identifier the identifier for which the metadata should be returned * @param type the type of the identifier, which can be doi or OSTIId * @return the metadata in the xml format * @throws OSTIElinkException */ protected String getMetadata(String identifier, String type) throws OSTIElinkException { String metadata = null; if (identifier != null && !identifier.trim().equals("")) { //we need to remove the doi prefix identifier = removeDOI(identifier); String url = null; try { url = baseURL + "?" + type + "=" + URLEncoder.encode(identifier, StandardCharsets.UTF_8.toString()); } catch (UnsupportedEncodingException e) { throw new OSTIElinkException("OSTIElinkService.getMetadata - couldn't encode the query url: " + e.getMessage()); } log.debug("OSTIElinkService.getMetadata - the url sending to the service is " + url); byte[] response = sendRequest(GET, url); metadata = new String(response); log.debug("OSTIElinkService.getMetadata - the reponse for id " + identifier + " is\n " + metadata); if (metadata == null || metadata.trim().equals("")) { throw new OSTIElinkNotFoundException("OSTIElinkService.getMetadata - the reponse is blank. So we can't find the identifier " + identifier + ", which type is " + type); } else if (!metadata.contains(identifier)) { Document doc = null; try { doc = generateDOM(response); } catch (Exception e) { //The response is not a xml document. Just throw the exception throw new OSTIElinkException("OSTIElinkService.getMetadata - can't get the metadata for id " + identifier + " since\n " + metadata); } String numFound = getAttributeValue(doc, "records", "numfound"); if (numFound.equals("0")) { throw new OSTIElinkNotFoundException("OSTIElinkService.getMetadata - OSTI can't find the identifier " + identifier + ", which type is " + type + " since\n " + metadata); } else { throw new OSTIElinkException("OSTIElinkService.getMetadata - can't get the metadata for id " + identifier + " since\n " + metadata); } } } else { throw new OSTIElinkException("OSTIElinkService.getMetadata - the given identifier can't be null or blank."); } return metadata; } /** * Set new metadata to the given doi. * The OSTI Elink service uses the OSTI id, rather than DOI, to update the metadata of a record. * So first we need to query OSTI service with the doi to figure out the OSTI id associated with the doi. * Then, we can update the metadata by specifying the OSTI id. * Sometimes the OSTI id is the part of DOI by removing the prefix. So we can use the prefix to skip * the service query if the prefix is not null and the given DOI starts with the prefix. * @param doi the identifier of the object which will be set the new metadata * @param doiPrefix a shortcut to determine if we can get OSTI_id (replace the query) by string comparing. The * safest way is pass null there (but it costs a query to the service). * @param metadataXML the new metadata in xml format * @throws OSTIElinkException */ public void setMetadata(String doi, String doiPrefix, String metadataXML) throws OSTIElinkException { String ostiId = getOstiId(doi, doiPrefix);// if the doi can't be found, an exception will be thrown. String newMetadataXML = addOrReplaceOstiIdToXMLMetadata(ostiId, metadataXML); log.debug("OSTIElinkService.setMetadata - the new xml metadata with the osti id " + ostiId + " for the doi identifier " + doi + " is:\n" + newMetadataXML); byte[] reponse = sendRequest(POST, baseURL, newMetadataXML); log.debug("OSTIElinkService.setMetadata - the response from the OSTI service to set " + "metadata for id " + doi + " is:\n " + new String(reponse)); Document doc = null; String status = null; try { doc = generateDOM(reponse); status = getElementValue(doc, STATUS); } catch (Exception e) { log.error("OSTIElinkService.setMetadata - can't get the status of the repsonse:\n" + new String(reponse) + "since:\n" + e.getLocalizedMessage()); } if (status == null || !status.equalsIgnoreCase(SUCCESS)) { throw new OSTIElinkException("OSTIElinkService.setMetadata - Error: " + new String(reponse)); } } /** * Get the status of a DOI. If there are multiple records for a DOI, the status of * the first one will be returned * @param doi the doi to identify the record * @return the status of the doi * @throws OSTIElinkException */ public String getStatus(String doi) throws OSTIElinkException { String status = null; String metadata = getMetadata(doi); if (metadata == null) { throw new OSTIElinkException("OSTIElinkService.getStatus - the metadata of the DOI " + doi + " can't be found."); } Document doc = generateDOM(metadata.getBytes()); status = getAttributeValue(doc, "record", "status");//get the attribute value of the first element log.debug("OSTIElinkService.getStatus - the status of " + doi + " is " + status); return status; } /** * Add the osti id element to the metadata as the first child if the metadata doesn't have one;otherwise, it will * replace with the new value * @param ostiId the value of the ostiId element will be added or replaced * @param metadataXML the metadata xml which will be modified * @return the xml string presentation of the new metadata document with the given osti id * @throws OSTIElinkException */ private String addOrReplaceOstiIdToXMLMetadata(String ostiId, String metadataXML) throws OSTIElinkException { if (metadataXML == null || metadataXML.trim().equals("")) { throw new OSTIElinkException("OSTIElinkService.addOrReplaceOstiIdToXMLMetadata - the metadata part mustn't be null or blank."); } Document doc = null; try { doc = generateDOM(metadataXML.getBytes()); } catch (Exception e) { throw new OSTIElinkException("OSTIElinkService.addOrReplaceOstiIdToXMLMetadata - the metadata part must be a valid xml string. But the string is " + metadataXML + " And it can't be processed because " + e.getMessage()); } NodeList osti_id_nodes = doc.getElementsByTagName(OSTI_ID); if (osti_id_nodes.getLength() == 0) { //it doesn't have an osti id, we need to append one NodeList records = doc.getElementsByTagName("record"); if (records.getLength() !=1 ) { throw new OSTIElinkException("DOIService.addOrReplaceOstiIdToXMLMetadata - the metadata must only one record."); } else { Node record = records.item(0); Node ostiNode = doc.createElement("osti_id"); Text newText = doc.createTextNode(ostiId); ostiNode.appendChild(newText); record.insertBefore(ostiNode, record.getFirstChild()); } } else if (osti_id_nodes.getLength() == 1) { //The osti id already exists, we need to replace it. Node osti_id_node = osti_id_nodes.item(0); NodeList children = osti_id_node.getChildNodes(); for (int i=0; i 0) { StringEntity myEntity = new StringEntity(requestBody, "UTF-8"); ((HttpPut) request).setEntity(myEntity); } setHeaders(request); break; case POST: request = new HttpPost(uri); if (requestBody != null && requestBody.length() > 0) { StringEntity myEntity = new StringEntity(requestBody, "UTF-8"); ((HttpPost) request).setEntity(myEntity); } setHeaders(request); break; case DELETE: request = new HttpDelete(uri); setHeaders(request); break; default: throw new OSTIElinkException("Unrecognized HTTP method requested."); } byte[] body = null; try { HttpResponse response = httpClient.execute(request); HttpEntity entity = response.getEntity(); if (entity != null) { body = EntityUtils.toByteArray(entity); } } catch (ClientProtocolException e) { throw new OSTIElinkException(e.getMessage()); } catch (IOException e) { throw new OSTIElinkException(e.getMessage()); } return body; } /** * This method will add the authorization and other headers for the osti service. This is the * default method. The setGetHeader is for the http get method. * Different version implementations will overwrite this method. * @param request the request will be modified. */ protected abstract void setHeaders(HttpUriRequest request); /** * This method will add the authorization and other headers for the http get method for the * osti service. * Different version implementations will overwrite this method. * @param request the request will be modified. */ protected abstract void setGetHeaders(HttpUriRequest request); /** * Build a minimal metadata for the given siteCode in order to * mint a DOI from the OSTI Elink service. If the siteCode is null or blank, the default ESS-DIVE code will be used. * @param siteCode the site code (determining the prefix of the DOI) will be used in the metadata * @return the minimal metadata will be used to mint a DOI * @throws ParserConfigurationException * @throws IOException * @throws SAXException * @throws OSTIElinkException */ protected String buildMinimalMetadata(String siteCode) throws OSTIElinkException { String metadataStr = null; if (minimalMetadataDoc == null) { try (InputStream is = getClass().getClassLoader().getResourceAsStream(minimalMetadataFile)) { DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder; try { dBuilder = dbFactory.newDocumentBuilder(); minimalMetadataDoc = dBuilder.parse(is); originalDefaultSiteCode = getElementValue(minimalMetadataDoc, "site_input_code"); log.debug("DOIService.buildMinimalMetadata - the original site code in the minimal metadata is " + originalDefaultSiteCode); } catch (ParserConfigurationException e) { throw new OSTIElinkException("OSTIElink.buildMinimalMetadata - Error: " + e.getMessage()); } catch (SAXException e) { throw new OSTIElinkException("OSTIElink.buildMinimalMetadata - Error: " + e.getMessage()); } catch (IOException e) { throw new OSTIElinkException("OSTIElink.buildMinimalMetadata - Error: " + e.getMessage()); } } catch (IOException ee) { throw new OSTIElinkException("OSTIElink.buildMinimalMetadata - Error to read the file: " + ee.getMessage()); } } if (siteCode != null && !siteCode.trim().equals("")) { modifySiteCode(siteCode); } else if (!originalDefaultSiteCode.equals(currentDefaultSiteCode)) { //now the user ask the default site code. But the site map value has been updated by another call. //we need to change back to the original code. modifySiteCode(originalDefaultSiteCode); } metadataStr = serialize(minimalMetadataDoc); return metadataStr; } /** * Modify the value of the site code element to the given value * @param siteCode the value will be assigned as the new value * @throws OSTIElinkException */ private void modifySiteCode(String siteCode) throws OSTIElinkException { synchronized (minimalMetadataDoc) { NodeList nodes = minimalMetadataDoc.getElementsByTagName("site_input_code"); if (nodes.getLength() > 0) { //Only change the first one Node node = nodes.item(0); NodeList children = node.getChildNodes(); for (int i=0; i 0) { Node node = nodes.item(0); NodeList children = node.getChildNodes(); for (int i=0; i 0) { Node node = nodes.item(0); Element e = (Element)node; value = e.getAttribute(attributeName).trim(); } } log.debug("OSTIElinkService.getAttributeValue - the value of the attribute " + attributeName + " on the element " + elementName + " is " + value); return value; } /** * Serialize the document object to a string * @param doc the dom model which need be serialized * @return the string representation of the dom model */ protected static String serialize(Document doc) { DOMImplementationLS domImplementation = (DOMImplementationLS) doc.getImplementation(); LSSerializer lsSerializer = domImplementation.createLSSerializer(); LSOutput lsOutput = domImplementation.createLSOutput(); lsOutput.setEncoding("UTF-8"); Writer stringWriter = new StringWriter(); lsOutput.setCharacterStream(stringWriter); lsSerializer.write(doc, lsOutput); return stringWriter.toString(); } }