/** * This work was created by participants in the DataONE project, and is * jointly copyrighted by participating institutions in DataONE. For * more information on DataONE, see our web site at http://dataone.org. * * Copyright 2022 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package org.dataone.cn.indexer; import java.io.ByteArrayInputStream; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.net.MalformedURLException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.Vector; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.apache.commons.codec.EncoderException; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.common.SolrException; import org.dataone.cn.indexer.object.ObjectManager; import org.dataone.cn.indexer.parser.BaseXPathDocumentSubprocessor; import org.dataone.cn.indexer.parser.IDocumentDeleteSubprocessor; import org.dataone.cn.indexer.parser.IDocumentSubprocessor; import org.dataone.cn.indexer.parser.ISolrField; import org.dataone.cn.indexer.solrhttp.HTTPService; import org.dataone.cn.indexer.solrhttp.SolrDoc; import org.dataone.cn.indexer.solrhttp.SolrElementAdd; import org.dataone.cn.indexer.solrhttp.SolrElementField; import org.dataone.configuration.Settings; import org.dataone.exceptions.MarshallingException; import org.dataone.service.exceptions.InvalidRequest; import org.dataone.service.exceptions.InvalidToken; import org.dataone.service.exceptions.NotAuthorized; import org.dataone.service.exceptions.NotFound; import org.dataone.service.exceptions.NotImplemented; import org.dataone.service.exceptions.ServiceFailure; import org.dataone.service.exceptions.UnsupportedType; import org.dataone.service.types.v1.Identifier; import org.dataone.service.types.v2.SystemMetadata; import org.dataone.service.util.TypeMarshaller; import org.dspace.foresite.OREParserException; import org.springframework.beans.factory.annotation.Autowired; import org.xml.sax.SAXException; /** * A class does insert, update and remove indexes to a SOLR server * @author tao * */ public class SolrIndex { public static final String ID = "id"; private static final String IDQUERY = ID+":*"; private static final String VERSION_CONFLICT = "version conflict"; private static final int VERSION_CONFLICT_MAX_ATTEMPTS = Settings.getConfiguration().getInt( "index.solr.versionConflict.max.attempts", 25); private static final int VERSION_CONFICT_WAITING = Settings.getConfiguration().getInt( "index.solr.versionConflict.waiting.time", 500); //milliseconds private static final List resourceMapFormatIdList = Settings.getConfiguration().getList( "index.resourcemap.namespace"); private static List subprocessors = null; private static List deleteSubprocessors = null; private static SolrClient solrServer = null; private static List copyFields = null;//list of solr copy fields private static HTTPService httpService = null; private String solrQueryUri = Settings.getConfiguration().getString("solr.query.uri"); private String solrIndexUri = Settings.getConfiguration().getString("solr.index.uri"); private XMLNamespaceConfig xmlNamespaceConfig = null; private static BaseXPathDocumentSubprocessor systemMetadataProcessor = null; private List sysmetaSolrFields = null; private static Log log = LogFactory.getLog(SolrIndex.class); /** * Constructor * @throws SAXException * @throws IOException */ public SolrIndex(XMLNamespaceConfig xmlNamespaceConfig, BaseXPathDocumentSubprocessor systemMetadataProcessor, HTTPService httpService) throws XPathExpressionException, ParserConfigurationException, IOException, SAXException { this.xmlNamespaceConfig = xmlNamespaceConfig; this.systemMetadataProcessor = systemMetadataProcessor; this.httpService = httpService; init(); } private void init() throws ParserConfigurationException, XPathExpressionException { sysmetaSolrFields = systemMetadataProcessor.getFieldList(); copyFields = httpService.getSolrCopyFields(); if (copyFields != null) { log.info("SolrIndex.init - the size of the copy fields from the solr schema is : " + copyFields.size()); for(String copyField : copyFields) { log.debug("SolrIndex.init - the copy field from the solr schema: " + copyField); } } else { log.warn("SolrIndex.init - the size of the copy fields from the solr schema is 0."); } } /** * Get the list of the Subprocessors in this index. * @return the list of the Subprocessors. */ public List getSubprocessors() { return subprocessors; } /** * Set the list of Subprocessors. * @param subprocessorList the list will be set. */ public void setSubprocessors(List subprocessorList) { for (IDocumentSubprocessor subprocessor : subprocessorList) { if (subprocessor instanceof BaseXPathDocumentSubprocessor) { XPathFactory xpathFactory = XPathFactory.newInstance(); XPath xpath = xpathFactory.newXPath(); xpath.setNamespaceContext(xmlNamespaceConfig); ((BaseXPathDocumentSubprocessor)subprocessor).initExpression(xpath); } } this.subprocessors = subprocessorList; } public List getDeleteSubprocessors() { return deleteSubprocessors; } public void setDeleteSubprocessors( List deleteSubprocessors) { this.deleteSubprocessors = deleteSubprocessors; } /** * Generate the index for the given information * @param id * @param systemMetadata * @param dataStream * @return * @throws IOException * @throws SAXException * @throws ParserConfigurationException * @throws XPathExpressionException * @throws MarshallingException * @throws SolrServerException * @throws EncoderException * @throws UnsupportedType * @throws NotFound * @throws NotImplemented */ private Map process(String id, SystemMetadata systemMetadata, String objectPath, boolean isSysmetaChangeOnly) throws IOException, SAXException, ParserConfigurationException, XPathExpressionException, MarshallingException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType{ log.debug("SolrIndex.process - trying to generate the solr doc object for the pid "+id); long start = System.currentTimeMillis(); Map docs = new HashMap(); // Load the System Metadata document ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream(); TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream); ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray()); try { docs = systemMetadataProcessor.processDocument(id, docs, systemMetadataStream); } catch (Exception e) { log.error(e.getMessage(), e); throw new SolrServerException(e.getMessage()); } long end = System.currentTimeMillis(); //log.info("SolrIndex.process - the time for processing the system metadata for the pid " + id + " is " + (end-start) + "milliseconds."); // get the format id for this object String formatId = docs.get(id).getFirstFieldValue(SolrElementField.FIELD_OBJECTFORMAT); boolean skipOtherProcessor = false; log.debug("SolrIndex.process - the object format id for the pid "+id+" is "+formatId); if (resourceMapFormatIdList.contains(formatId) && isSysmetaChangeOnly) { //we need to make the solr doc exists (means the resource map was processed SolrDoc existingResourceMapSolrDoc = httpService.getSolrDocumentById(solrQueryUri, id); if (existingResourceMapSolrDoc != null ) { log.info("SolrIndex.process - This is a systemmetadata-change-only event for the resource map " + id + ". So we only use the system metadata subprocessor"); skipOtherProcessor = true; } else { log.info("SolrIndex.process - There is no solr doc for the resource map " + id + ". Even though this is a systemmetadata-change-only event, we can NOT just reindex the systemmeta only."); } } log.debug("SolrIndex.process - the value of skipOtherProcessors is " + skipOtherProcessor + " and the object path is " + objectPath + " for the id " + id); //if the objectPath is null, we should skip the other processes if (!skipOtherProcessor && objectPath != null) { log.debug("SolrIndex.process - Start to use subprocessor list to process " + id); // Determine if subprocessors are available for this ID if (subprocessors != null) { // for each subprocessor loaded from the spring config for (IDocumentSubprocessor subprocessor : subprocessors) { // Does this subprocessor apply? if (subprocessor.canProcess(formatId)) { // if so, then extract the additional information from the // document. try { // docObject = the resource map document or science // metadata document. // note that resource map processing touches all objects // referenced by the resource map. //start = System.currentTimeMillis(); FileInputStream dataStream = new FileInputStream(objectPath); //end = System.currentTimeMillis(); //log.info("SolrIndex.process - the time for reading the file input stream " + " for the pid " + id + " is " + (end-start) + "milliseconds."); if (!dataStream.getFD().valid()) { log.error("SolrIndex.process - subprocessor "+ subprocessor.getClass().getName() +" couldn't process since it could not load OBJECT file for ID,Path=" + id + ", " + objectPath); //throw new Exception("Could not load OBJECT for ID " + id ); } else { start = System.currentTimeMillis(); docs = subprocessor.processDocument(id, docs, dataStream); end = System.currentTimeMillis(); log.info("SolrIndex.process - the time for calling processDocument for the subprocessor " + subprocessor.getClass().getName() +" for the pid " + id + " is " + (end-start) + "milliseconds."); log.debug("SolrIndex.process - subprocessor "+ subprocessor.getClass().getName() +" generated solr doc for id "+id); } } catch (Exception e) { e.printStackTrace(); log.error(e.getMessage(), e); throw new SolrServerException(e.getMessage()); } } } } } /*if(docs != null) { SolrDoc solrDoc = docs.get(id); ByteArrayOutputStream baos = new ByteArrayOutputStream(); solrDoc.serialize(baos, "UTF-8"); log.warn("after process the science metadata, the solr doc is \n"+baos.toString()); }*/ // TODO: in the XPathDocumentParser class in d1_cn_index_process module, // merge is only for resource map. We need more work here. for (SolrDoc mergeDoc : docs.values()) { if (!mergeDoc.isMerged()) { mergeWithIndexedDocument(mergeDoc); } } /*if(docs != null) { SolrDoc solrDoc = docs.get(id); ByteArrayOutputStream baos = new ByteArrayOutputStream(); solrDoc.serialize(baos, "UTF-8"); log.warn("after merge, the solr doc is \n"+baos.toString()); }*/ //SolrElementAdd addCommand = getAddCommand(new ArrayList(docs.values())); return docs; } /** * Merge updates with existing solr documents * * This method appears to re-set the data package field data into the * document about to be updated in the solr index. Since packaging * information is derived from the package document (resource map), this * information is not present when processing a document contained in a data * package. This method replaces those values from the existing solr index * record for the document being processed. -- sroseboo, 1-18-12 * * @param indexDocument * @return * @throws IOException * @throws EncoderException * @throws XPathExpressionException * @throws SAXException * @throws ParserConfigurationException * @throws SolrServerException * @throws UnsupportedType * @throws NotFound * @throws NotImplemented */ // TODO:combine merge function with resourcemap merge function private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOException, EncoderException, XPathExpressionException, SolrServerException, ParserConfigurationException, SAXException, NotImplemented, NotFound, UnsupportedType { //Retrieve the existing solr document from the solr server for the id. If it doesn't exist, null or empty solr doc will be returned. SolrDoc indexedDocument = httpService.getSolrDocumentById(solrQueryUri, indexDocument.getIdentifier()); /*int wait = new Double(Math.random() * 10000).intValue(); System.out.println("++++++++++++++++++++++++++++ the wait time is " + wait); try { Thread.sleep(wait); } catch (Exception e) { }*/ if (indexedDocument == null || indexedDocument.getFieldList().size() <= 0) { return indexDocument; } else { Vector mergeNeededFields = new Vector(); for (SolrElementField field : indexedDocument.getFieldList()) { if ((field.getName().equals(SolrElementField.FIELD_ISDOCUMENTEDBY) || field.getName().equals(SolrElementField.FIELD_DOCUMENTS) || field .getName().equals(SolrElementField.FIELD_RESOURCEMAP)) && !indexDocument.hasFieldWithValue(field.getName(), field.getValue())) { indexDocument.addField(field); } else if (!copyFields.contains(field.getName()) && !indexDocument.hasField(field.getName()) && !isSystemMetadataField(field.getName())) { // we don't merge the system metadata field since they can be removed. // we don't merge the copyFields as well log.debug("SolrIndex.mergeWithIndexedDocument - put the merge-needed existing solr field "+field.getName()+" with value "+field.getValue()+" from the solr server to a vector. We will merge it later."); //indexDocument.addField(field); mergeNeededFields.add(field);//record this name since we can have mutiple name/value for the same name. See https://projects.ecoinformatics.org/ecoinfo/issues/7168 } } if(mergeNeededFields != null) { for(SolrElementField field: mergeNeededFields) { log.debug("SolrIndex.mergeWithIndexedDocument - merge the existing solr field "+field.getName()+" with value "+field.getValue()+" from the solr server to the currently processing document of "+indexDocument.getIdentifier()); indexDocument.addField(field); } } indexDocument.setMerged(true); return indexDocument; } } /* * If the given field name is a system metadata field. */ private boolean isSystemMetadataField(String fieldName) { boolean is = false; if (fieldName != null && !fieldName.trim().equals("") && sysmetaSolrFields != null) { for(ISolrField field : sysmetaSolrFields) { if(field != null && field.getName() != null && field.getName().equals(fieldName)) { log.debug("SolrIndex.isSystemMetadataField - the field name "+fieldName+" matches one record of system metadata field list. It is a system metadata field."); is = true; break; } } } return is; } /** * Check the parameters of the insert or update methods. * @param pid * @param systemMetadata * @param data * @throws SolrServerException */ private void checkParams(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws InvalidRequest { if(pid == null || pid.getValue() == null || pid.getValue().trim().equals("")) { throw new InvalidRequest("0000", "The identifier of the indexed document should not be null or blank."); } if(systemMetadata == null) { throw new InvalidRequest("0000", "The system metadata of the indexed document "+pid.getValue()+ " should not be null."); } /*if(objectPath == null) { throw new SolrServerException("The indexed document itself for pid "+pid.getValue()+" should not be null."); }*/ } /** * Insert the indexes for a document. * @param pid the id of this document * @param systemMetadata the system metadata associated with the data object * @param data the path to the object file itself * @throws SolrServerException * @throws MarshallingException * @throws EncoderException * @throws UnsupportedType * @throws NotFound * @throws NotImplemented * @throws InvalidRequest */ private void insert(Identifier pid, SystemMetadata systemMetadata, String objectPath, boolean isSysmetaChangeOnly) throws IOException, SAXException, ParserConfigurationException, InvalidRequest, XPathExpressionException, SolrServerException, MarshallingException, EncoderException, NotImplemented, NotFound, UnsupportedType { checkParams(pid, systemMetadata, objectPath); log.debug("SolrIndex.insert - trying to insert the solrDoc for object "+pid.getValue()); long start = System.currentTimeMillis(); Map docs = process(pid.getValue(), systemMetadata, objectPath, isSysmetaChangeOnly); long end = System.currentTimeMillis(); log.info("SolrIndex.insert - the subprocessor processing time of " + pid.getValue() + " is " + (end-start) + " milliseconds."); //transform the Map to the SolrInputDocument which can be used by the solr server if(docs != null) { start = System.currentTimeMillis(); Set ids = docs.keySet(); for(String id : ids) { if(id != null) { SolrDoc doc = docs.get(id); insertToIndex(doc); log.debug("SolrIndex.insert - inserted the solr-doc object of pid "+id+", which relates to object "+pid.getValue()+", into the solr server."); } } end = System.currentTimeMillis(); log.info("SolrIndex.insert - finished to insert the solrDoc to the solr server for object " + pid.getValue() + " and it took " + (end-start) + " milliseconds."); } else { log.debug("SolrIndex.insert - the genered solrDoc is null. So we will not index the object "+pid.getValue()); } } /* * Insert a SolrDoc to the solr server. */ private void insertToIndex(SolrDoc doc) throws SolrServerException, IOException { Vector docs = new Vector(); docs.add(doc); SolrElementAdd addCommand = new SolrElementAdd(docs); httpService.sendUpdate(solrIndexUri, addCommand, "UTF-8"); } /*private void insertToIndex(SolrDoc doc) throws SolrServerException, IOException { if(doc != null ) { SolrInputDocument solrDoc = new SolrInputDocument(); List list = doc.getFieldList(); if(list != null) { //solrDoc.addField(METACATPIDFIELD, pid); Iterator iterator = list.iterator(); while (iterator.hasNext()) { SolrElementField field = iterator.next(); if(field != null) { String value = field.getValue(); String name = field.getName(); log.trace("SolrIndex.insertToIndex - add name/value pair - "+name+"/"+value); solrDoc.addField(name, value); } } } if(!solrDoc.isEmpty()) { try { UpdateResponse response = solrServer.add(solrDoc); solrServer.commit(); } catch (SolrServerException e) { throw e; } catch (IOException e) { throw e; } //System.out.println("=================the response is:\n"+response.toString()); } } }*/ /** * Update the solr index. This method handles the three scenarios: * 1. Remove an existing doc - if the the system metadata shows the value of the archive is true, * remove the index for the previous version(s) and generate new index for the doc. * 2. Add a new doc - if the system metadata shows the value of the archive is false, generate the * index for the doc. * @throws NotFound * @throws ServiceFailure * @throws NotImplemented * @throws NotAuthorized * @throws InvalidToken * @throws EncoderException * @throws MarshallingException * @throws SolrServerException * @throws ParserConfigurationException * @throws SAXException * @throws UnsupportedType * @throws XPathExpressionException * @throws InterruptedException * @throws IOException * @throws InvalidRequest * @throws IllegalAccessException * @throws InstantiationException */ public void update(Identifier pid, String relativePath, boolean isSysmetaChangeOnly) throws InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, NotFound, XPathExpressionException, UnsupportedType, SAXException, ParserConfigurationException, SolrServerException, MarshallingException, EncoderException, InterruptedException, IOException, InvalidRequest, InstantiationException, IllegalAccessException { log.debug("SolrIndex.update - trying to update(insert or remove) solr index of object "+pid.getValue()); String objectPath = null; SystemMetadata systemMetadata = ObjectManager.getInstance().getSystemMetadata(pid.getValue(), relativePath); objectPath = ObjectManager.getInstance().getFilePath(relativePath, systemMetadata.getFormatId().getValue()); try { insert(pid, systemMetadata, objectPath, isSysmetaChangeOnly); } catch (SolrServerException e) { if (e.getMessage().contains(VERSION_CONFLICT) && VERSION_CONFLICT_MAX_ATTEMPTS > 0) { log.info("SolrIndex.update - Indexer grabbed an older verion (version conflict) of the solr doc for object " + pid.getValue() + ". It will try " + VERSION_CONFLICT_MAX_ATTEMPTS + " to fix the issues"); for (int i=0; i docsToUpdate = getUpdatedSolrDocsByRemovingResourceMap(pid); if (docsToUpdate != null && !docsToUpdate.isEmpty()) { //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate); //httpService.sendUpdate(solrIndexUri, addCommand); for(SolrDoc doc : docsToUpdate) { //deleteDocFromIndex(doc.getIdentifier()); insertToIndex(doc); } } break; } catch (SolrServerException e) { if (e.getMessage().contains(VERSION_CONFLICT) && VERSION_CONFLICT_MAX_ATTEMPTS > 0) { log.info("SolrIndex.removeDataPackage - Indexer grabbed an older verion (version conflict) of the solr doc for object" + ". It will try " + (VERSION_CONFLICT_MAX_ATTEMPTS - i )+ " to fix the issues"); } else { throw e; } } } } /* * Get the list of the solr doc which need to be updated because the removal of the resource map */ private List getUpdatedSolrDocsByRemovingResourceMap(String resourceMapId) throws UnsupportedType, NotFound, SolrServerException, ParserConfigurationException, SAXException, MalformedURLException, IOException, XPathExpressionException, EncoderException { List updatedSolrDocs = null; if (resourceMapId != null && !resourceMapId.trim().equals("")) { /*List docsContainResourceMap = httpService.getDocumentsByResourceMap( solrQueryUri, resourceMapId);*/ List docsContainResourceMap = httpService.getDocumentsByResourceMap(solrQueryUri, resourceMapId); updatedSolrDocs = removeResourceMapRelationship(docsContainResourceMap, resourceMapId); } return updatedSolrDocs; } /* * Get the list of the solr doc which need to be updated because the removal of the resource map */ private List removeResourceMapRelationship(List docsContainResourceMap, String resourceMapId) throws XPathExpressionException, IOException { List totalUpdatedSolrDocs = new ArrayList(); if (docsContainResourceMap != null && !docsContainResourceMap.isEmpty()) { for (SolrDoc doc : docsContainResourceMap) { List updatedSolrDocs = new ArrayList(); List resourceMapIdStrs = doc .getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP); List dataIdStrs = doc .getAllFieldValues(SolrElementField.FIELD_DOCUMENTS); List metadataIdStrs = doc .getAllFieldValues(SolrElementField.FIELD_ISDOCUMENTEDBY); if ((dataIdStrs == null || dataIdStrs.isEmpty()) && (metadataIdStrs == null || metadataIdStrs.isEmpty())) { // only has resourceMap field, doesn't have either documentBy or documents fields. // so we only remove the resource map field. doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP, resourceMapId); updatedSolrDocs.add(doc); } else if ((dataIdStrs != null && !dataIdStrs.isEmpty()) && (metadataIdStrs == null || metadataIdStrs.isEmpty())) { //The solr doc is for a metadata object since the solr doc documents data files updatedSolrDocs = removeAggregatedItems(resourceMapId, doc, resourceMapIdStrs, dataIdStrs, SolrElementField.FIELD_DOCUMENTS); } else if ((dataIdStrs == null || dataIdStrs.isEmpty()) && (metadataIdStrs != null && !metadataIdStrs.isEmpty())) { //The solr doc is for a data object since it documentedBy elements. updatedSolrDocs = removeAggregatedItems(resourceMapId, doc, resourceMapIdStrs, metadataIdStrs, SolrElementField.FIELD_ISDOCUMENTEDBY); } else if ((dataIdStrs != null && !dataIdStrs.isEmpty()) && (metadataIdStrs != null && !metadataIdStrs.isEmpty())){ // both metadata and data for one object List solrDocsRemovedDocuments = removeAggregatedItems(resourceMapId, doc, resourceMapIdStrs, dataIdStrs, SolrElementField.FIELD_DOCUMENTS); List solrDocsRemovedDocumentBy = removeAggregatedItems(resourceMapId, doc, resourceMapIdStrs, metadataIdStrs, SolrElementField.FIELD_ISDOCUMENTEDBY); updatedSolrDocs = mergeUpdatedSolrDocs(solrDocsRemovedDocumentBy, solrDocsRemovedDocuments); } //move them to the final result if(updatedSolrDocs != null) { for(SolrDoc updatedDoc: updatedSolrDocs) { totalUpdatedSolrDocs.add(updatedDoc); } } } } return totalUpdatedSolrDocs; } /* * Process the list of ids of the documentBy/documents in a slor doc. */ private List removeAggregatedItems(String targetResourceMapId, SolrDoc doc, List resourceMapIdsInDoc, List aggregatedItemsInDoc, String fieldNameRemoved) { List updatedSolrDocs = new ArrayList(); if (doc != null && resourceMapIdsInDoc != null && aggregatedItemsInDoc != null && fieldNameRemoved != null) { if (resourceMapIdsInDoc.size() == 1) { //only has one resource map. remove the resource map. also remove the documentBy doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP, targetResourceMapId); doc.removeAllFields(fieldNameRemoved); updatedSolrDocs.add(doc); } else if (resourceMapIdsInDoc.size() > 1) { //we have multiple resource maps. We should match them. Map ids = matchResourceMapsAndItems(doc.getIdentifier(), targetResourceMapId, resourceMapIdsInDoc, aggregatedItemsInDoc, fieldNameRemoved); if (ids != null) { for (String id : ids.keySet()) { doc.removeFieldsWithValue(fieldNameRemoved, id); } } doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP, targetResourceMapId); updatedSolrDocs.add(doc); /*if (aggregatedItemsInDoc.size() > 1) { } else { //multiple resource map aggregate same metadata and data. Just remove the resource map doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP, targetResourceMapId); updatedSolrDocs.add(doc); }*/ } } return updatedSolrDocs; } /* * Return a map of mapping aggregation id map the target resourceMapId. * This will look the aggregation information in another side - If the targetId * is a metadata object, we will look the data objects which it describes; If * the targetId is a data object, we will look the metadata object which documents it. */ private Map matchResourceMapsAndItems(String targetId, String targetResourceMapId, List originalResourceMaps, List aggregatedItems, String fieldName) { Map map = new HashMap(); if (targetId != null && targetResourceMapId != null && aggregatedItems != null && fieldName != null) { String newFieldName = null; if (fieldName.equals(SolrElementField.FIELD_ISDOCUMENTEDBY)) { newFieldName = SolrElementField.FIELD_DOCUMENTS; } else if (fieldName.equals(SolrElementField.FIELD_DOCUMENTS)) { newFieldName = SolrElementField.FIELD_ISDOCUMENTEDBY; } if (newFieldName != null) { for (String item : aggregatedItems) { SolrDoc doc = null; try { doc = httpService.getSolrDocumentById(solrQueryUri, item); List fieldValues = doc.getAllFieldValues(newFieldName); List resourceMapIds = doc .getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP); if ((fieldValues != null && fieldValues.contains(targetId)) && (resourceMapIds != null && resourceMapIds .contains(targetResourceMapId))) { //okay, we found the target aggregation item id and the resource map id //in this solr doc. However, we need check if another resource map with different //id but specify the same relationship. If we have the id(s), we should not // remove the documents( or documentBy) element since we need to preserve the // relationship for the remain resource map. boolean hasDuplicateIds = false; if(originalResourceMaps != null) { for(String id :resourceMapIds) { if (originalResourceMaps.contains(id) && !id.equals(targetResourceMapId)) { hasDuplicateIds = true; break; } } } if(!hasDuplicateIds) { map.put(item, targetResourceMapId); } } } catch (Exception e) { log.warn("SolrIndex.matchResourceMapsAndItems - can't get the solrdoc for the id " + item + " since " + e.getMessage()); } } } } return map; } /* * Merge two list of updated solr docs. removedDocumentBy has the correct information about documentBy element. * removedDocuments has the correct information about the documents element. * So we go through the two list and found the two docs having the same identifier. * Get the list of the documents value from the one in the removedDoucments (1). * Remove all values of documents from the one in the removedDocumentBy. * Then copy the list of documents value from (1) to to the one in the removedDocumentBy. */ private List mergeUpdatedSolrDocs(ListremovedDocumentBy, ListremovedDocuments) { List mergedDocuments = new ArrayList(); if(removedDocumentBy == null || removedDocumentBy.isEmpty()) { mergedDocuments = removedDocuments; } else if (removedDocuments == null || removedDocuments.isEmpty()) { mergedDocuments = removedDocumentBy; } else { int sizeOfDocBy = removedDocumentBy.size(); int sizeOfDocs = removedDocuments.size(); for(int i=sizeOfDocBy-1; i>= 0; i--) { SolrDoc docInRemovedDocBy = removedDocumentBy.get(i); for(int j= sizeOfDocs-1; j>=0; j--) { SolrDoc docInRemovedDocs = removedDocuments.get(j); if(docInRemovedDocBy.getIdentifier().equals(docInRemovedDocs.getIdentifier())) { //find the same doc in both list. let's merge them. //first get all the documents element from the docWithDocs(it has the correct information about the documents element) List idsInDocuments = docInRemovedDocs.getAllFieldValues(SolrElementField.FIELD_DOCUMENTS); docInRemovedDocBy.removeAllFields(SolrElementField.FIELD_DOCUMENTS);//clear out any documents element in docInRemovedDocBy //add the Documents element from the docInRemovedDocs if it has any. // The docInRemovedDocs has the correct information about the documentBy. Now it copied the correct information of the documents element. // So docInRemovedDocs has both correct information about the documentBy and documents elements. if(idsInDocuments != null) { for(String id : idsInDocuments) { if(id != null && !id.trim().equals("")) { docInRemovedDocBy.addField(new SolrElementField(SolrElementField.FIELD_DOCUMENTS, id)); } } } //intersect the resource map ids. List resourceMapIdsInWithDocs = docInRemovedDocs.getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP); List resourceMapIdsInWithDocBy = docInRemovedDocBy.getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP); docInRemovedDocBy.removeAllFields(SolrElementField.FIELD_RESOURCEMAP); Collection resourceMapIds = CollectionUtils.union(resourceMapIdsInWithDocs, resourceMapIdsInWithDocBy); if(resourceMapIds != null) { for(Object idObj : resourceMapIds) { String id = (String)idObj; docInRemovedDocBy.addField(new SolrElementField(SolrElementField.FIELD_RESOURCEMAP, id)); } } //we don't need do anything about the documentBy elements since the docInRemovedDocBy has the correct information. mergedDocuments.add(docInRemovedDocBy); //delete the two documents from the list removedDocumentBy.remove(i); removedDocuments.remove(j); break; } } } // when we get there, if the two lists are empty, this will be a perfect merge. However, if something are left. we //just put them in. for(SolrDoc doc: removedDocumentBy) { mergedDocuments.add(doc); } for(SolrDoc doc: removedDocuments) { mergedDocuments.add(doc); } } return mergedDocuments; } /* * Remove a pid which is part of resource map. */ private void removeFromDataPackage(String pid) throws XPathExpressionException, IOException, EncoderException, SolrServerException { SolrDoc indexedDoc = httpService.getSolrDocumentById(solrQueryUri, pid); deleteDocFromIndex(pid); List documents = indexedDoc.getAllFieldValues(SolrElementField.FIELD_DOCUMENTS); if (documents != null && !documents.isEmpty()) { for (String documentsValue : documents) { for (int i=0; i 0) { log.info("SolrIndex.removeFromDataPackage - Indexer grabbed an older verion (version conflict) of the solr doc for object " + documentsValue + ". It will try " + (VERSION_CONFLICT_MAX_ATTEMPTS - i )+ " to fix the issues"); } else { throw e; } } } } } List documentedBy = indexedDoc.getAllFieldValues(SolrElementField.FIELD_ISDOCUMENTEDBY); if (documentedBy != null && !documentedBy.isEmpty()) { for (String documentedByValue : documentedBy) { for (int i=0; i 0) { log.info("SolrIndex.removeFromDataPackage - Indexer grabbed an older verion (version conflict) of the solr doc for object " + documentedByValue + ". It will try " + (VERSION_CONFLICT_MAX_ATTEMPTS - i )+ " to fix the issues"); } else { throw e; } } } } } } /* * Remove a pid from the solr index */ /*private synchronized void removeFromIndex(String identifier) throws Exception { Map docs = new HashMap(); for (IDocumentDeleteSubprocessor deleteSubprocessor : deleteSubprocessors) { docs.putAll(deleteSubprocessor.processDocForDelete(identifier, docs)); } List docsToUpdate = new ArrayList(); List idsToIndex = new ArrayList(); for (String idToUpdate : docs.keySet()) { if (docs.get(idToUpdate) != null) { docsToUpdate.add(docs.get(idToUpdate)); } else { idsToIndex.add(idToUpdate); } } // update the docs we have for (SolrDoc docToUpdate : docsToUpdate) { insertToIndex(docToUpdate); } // delete this one deleteDocFromIndex(identifier); // index the rest //TODO: we need to figure out how to get the file path for (String idToIndex : idsToIndex) { Identifier pid = new Identifier(); pid.setValue(idToIndex); SystemMetadata sysMeta = DistributedMapsFactory.getSystemMetadata(idToIndex); if (SolrDoc.visibleInIndex(sysMeta)) { String objectPath = DistributedMapsFactory.getObjectPathMap().get(pid); boolean isSysmetaChangeOnlyEvent = false; insert(pid, sysMeta, objectPath, isSysmetaChangeOnlyEvent); } } }*/ private void deleteDocFromIndex(String pid) throws IOException { if (pid != null && !pid.trim().equals("")) { try { //solrServer.deleteById(pid); //solrServer.commit(); httpService.sendSolrDelete(pid, solrIndexUri); //} catch (SolrServerException e) { //throw e; } catch (IOException e) { throw e; } } } /** * Set the http service * @param service */ public void setHttpService(HTTPService service) { this.httpService = service; } /** * Get the http service * @return the http service */ public HTTPService getHttpService() { return httpService; } }