/*
* AtomOREParser.java
*
* Copyright (c) 2008, Hewlett-Packard Company and Massachusetts
* Institute of Technology. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the Hewlett-Packard Company nor the name of the
* Massachusetts Institute of Technology nor the names of their
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
package org.dspace.foresite.atom;
import org.dspace.foresite.OREParser;
import org.dspace.foresite.ResourceMap;
import org.dspace.foresite.OREParserException;
import org.dspace.foresite.OREFactory;
import org.dspace.foresite.Aggregation;
import org.dspace.foresite.Agent;
import org.dspace.foresite.OREVocabulary;
import org.dspace.foresite.OREException;
import org.dspace.foresite.AggregatedResource;
import org.dspace.foresite.ReMSerialisation;
import org.dspace.foresite.Proxy;
import org.dspace.foresite.Vocab;
import org.jdom.Element;
import org.jdom.Namespace;
import org.jdom.output.XMLOutputter;
import java.io.InputStream;
import java.io.IOException;
import java.io.ByteArrayOutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.List;
import java.util.Date;
import java.util.Properties;
import com.sun.syndication.io.XmlReader;
import com.sun.syndication.io.FeedException;
import com.sun.syndication.io.WireFeedInput;
import com.sun.syndication.feed.atom.Feed;
import com.sun.syndication.feed.atom.Person;
import com.sun.syndication.feed.atom.Link;
import com.sun.syndication.feed.atom.Category;
import com.sun.syndication.feed.atom.Generator;
import com.sun.syndication.feed.atom.Entry;
import com.sun.syndication.feed.atom.Content;
/**
* @Author: Richard Jones
*/
public class AtomOREParser implements OREParser
{
public static Namespace rdfNS = Namespace.getNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
public ResourceMap parse(InputStream is, URI uri) throws OREParserException
{
return null;
}
public ResourceMap parse(InputStream is)
throws OREParserException
{
try
{
// read in the ATOM document
XmlReader reader = new XmlReader(is);
WireFeedInput input = new WireFeedInput();
Feed atom = (Feed) input.build(reader);
// mine the atom feed
URI uri_a = new URI(atom.getId());
List links = atom.getOtherLinks();
List altLinks = atom.getAlternateLinks();
links.addAll(altLinks); // add all the links together for convenience
List authors = atom.getAuthors();
String title = atom.getTitle();
List categories = atom.getCategories();
List contributors = atom.getContributors();
Date updated = atom.getUpdated();
Generator generator = atom.getGenerator();
String rights = atom.getRights();
List rdf = (List) atom.getForeignMarkup();
List entries = atom.getEntries();
// do the initial validation
this.validate(categories);
// declare some very specific required variables
URI uri_r = this.getURIR(links);
// start reading in the atom feed to the model
//////////////////////////////////////////////
// atom:id :: URI-A
Aggregation agg = OREFactory.createAggregation(uri_a);
// atom:link@rel='self' :: URI-R
ResourceMap rem = agg.createResourceMap(uri_r);
// atom:author :: URI-A dcterms:creator
for (Person author : authors)
{
String auri = author.getUri();
Agent creator;
if (auri != null)
{
creator = OREFactory.createAgent(new URI(auri));
}
else
{
creator = OREFactory.createAgent();
}
String name = author.getName();
if (name != null)
{
creator.addName(name);
}
String mbox = author.getEmail();
if (mbox != null)
{
if (!mbox.startsWith("mailto:"))
{
mbox = "mailto:" + mbox;
}
creator.addMbox(new URI(mbox));
}
agg.addCreator(creator);
}
// atom:title :: Aggregation title
if (title != null)
{
agg.addTitle(title);
}
// atom:category :: URI-A rdf:type
for (Category category : categories)
{
// exclude the mandatory Aggregation, as this exists by default in the model
String aggURI = Vocab.ore_Aggregation.uri().toString();
if (!aggURI.equals(category.getTerm()))
{
// the @term specifies the rdf:type relation
String type = category.getTerm();
URI typeURI = new URI(type);
agg.addType(typeURI);
// FIXME: we need to find a nice simple way of doing this
// FIXME: actually, don't we just want to build this into the model
// in all cases?
// the @scheme specifies the rdfs:isDefinedBy relation of the term
// Triple triple = OREFactory.createTriple(typeURI, new URI(""));
// FIXME: we need to find a nice simple way of doing this
// the @label specifies the rdfs:label relation
String label = category.getLabel();
agg.createTriple(Vocab.rdfs_label, label);
}
}
// deal with the various links
for (Link link : links)
{
// atom:link@rel='related' :: URI-A ore:similarTo
if ("related".equals(link.getRel()))
{
agg.addSimilarTo(new URI(link.getHref()));
// FIXME: we need to find a nice simple way of doing these
// @type :: URI-similar dc:format
// @hreflang :: URI-similar dc:language
// @title :: URI-similar dc:title
}
// atom:link@rel='alternate' :: URI-A ore:isDescribedBy
if ("alternate".equals(link.getRel()))
{
ReMSerialisation serial = new ReMSerialisation();
serial.setURI(new URI(link.getHref()));
// FIXME: we need to find a nice simple way of doing these
// @type :: URI-R-other dc:format
// @hreflang :: URI-R-other dc:language
// @title :: URI-R-other dc:title
// FIXME: consider adding a title to the ReMSerialisation class?
// add the resource map serialisation
agg.addReMSerialisation(serial);
}
// atom:link@rel='alternate' :: URI-A dcterms:rights URI-rights
if ("license".equals(link.getRel()))
{
// FIXME: the model doesn't currently support Rights as a URI
// FIXME: we need to find a nice simple way of doing these
// @type :: URI-R-other dc:format
// @hreflang :: URI-R-other dc:language
// @title :: URI-R-other dc:title
}
}
// FIXME: need an easier way to add arbitrary metadata
// atom:icon :: URI-A foaf:logo URI-icon
String iconURI = atom.getIcon();
// FIXME: not yet supported by the model
// atom:contributor :: URI-A dcterms:contributor URI-contributor
// use the List defined above
// FIXME: meed an easier way to add arbitrary metadata
// atom:subtitle :: URI-A dc:description
// atom:updated :: URI-R dcterms:modified
if (updated != null)
{
rem.setModified(updated);
}
// atom:generator :: URI-R dcterms:creator
if (generator != null)
{
Agent remCreator;
String genURL = generator.getUrl();
if (genURL != null)
{
remCreator = OREFactory.createAgent(new URI(genURL));
}
else
{
remCreator = OREFactory.createAgent();
}
String name = generator.getValue();
if (name != null)
{
remCreator.addName(generator.getValue());
}
rem.addCreator(remCreator);
}
// atom:rights :: URI-R dc:rights
if (rights != null)
{
rem.setRights(rights);
}
// now process the individual atom entries
//////////////////////////////////////////
for (Entry entry : entries)
{
this.aggregatedResource(entry, agg);
}
// Finally extract the goodies from the embedded RDF
////////////////////////////////////////////////////
// rdf:Description :: Add to model directly
Element root = new Element("RDF", rdfNS);
for (Element element : rdf)
{
root.addContent(element);
}
XMLOutputter out = new XMLOutputter();
ByteArrayOutputStream os = new ByteArrayOutputStream();
out.output(root, os);
agg.addRDF(os.toString(), "RDF/XML");
/* FIXME: I don't think this is relevant any longer, but hold on to it for the moment ...
AggregationRDF ardf = new AggregationRDF();
RemRDF rrdf = new RemRDF();
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
Namespace oreNs = Namespace.getNamespace("ore", "http://www.openarchives.org/ore/terms/");
Namespace rdfNs = Namespace.getNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
Namespace dcNs = Namespace.getNamespace("dc", "http://purl.org/dc/terms/");
for (Element element : rdf)
{
String about = element.getAttributeValue("about");
if (about.equals(uri_a.toString()))
{
ardf.setAbout(about);
List children = element.getChildren("isAggregatedBy", oreNs);
List aggregatedBy = new ArrayList();
for (Element child : children)
{
String aggregator = child.getValue().trim();
aggregatedBy.add(aggregator);
}
ardf.setAggregatedBy(aggregatedBy);
List kids = element.getChildren("created", dcNs);
List created = new ArrayList();
for (Element kid : kids)
{
String create = kid.getValue().trim();
Date date = sdf.parse(create);
created.add(date);
}
ardf.setCreated(created);
}
else if (about.equals(uri_r.toString()))
{
rrdf.setAbout(about);
List kids = element.getChildren("created", dcNs);
List created = new ArrayList();
for (Element kid : kids)
{
String create = kid.getValue().trim();
Date date = sdf.parse(create);
created.add(date);
}
rrdf.setCreated(created);
}
// FIXME: what if the about is about neither?????
}*/
// finally, some cleaning up
////////////////////////////
// if no date rem created specified, use now
if (rem.getCreated() == null)
{
rem.setCreated(new Date());
}
// if no date aggregation created specified, use now
if (agg.getCreated() == null)
{
agg.setCreated(new Date());
}
// now we can just return (the model is already assembled)
//////////////////////////////////////////////////////////
return rem;
}
catch (IOException e)
{
throw new OREParserException(e);
}
catch (FeedException e)
{
throw new OREParserException(e);
}
catch (URISyntaxException e)
{
throw new OREParserException(e);
}
catch (OREException e)
{
throw new OREParserException(e);
}
}
private void aggregatedResource(Entry entry, Aggregation agg)
throws URISyntaxException, OREException, OREParserException, IOException
{
// mine the entry
URI uri_p = null;
String proxyURI = entry.getId();
if (proxyURI != null)
{
uri_p = new URI(proxyURI);
}
List links = entry.getOtherLinks();
List altLinks = entry.getAlternateLinks();
links.addAll(altLinks); // add all the links together for convenience
List authors = entry.getAuthors();
String title = entry.getTitle();
List categories = entry.getCategories();
List contributors = entry.getContributors();
Content summary = entry.getSummary();
List rdf = (List) entry.getForeignMarkup();
URI uri_ar = this.getURIAR(links);
AggregatedResource ar = agg.createAggregatedResource(uri_ar);
Proxy proxy = null;
if (uri_p != null)
{
proxy = ar.createProxy(uri_p);
}
for (Link link : links)
{
// atom:link@rel='alternate' :: URI-AR
if ("alternate".equals(link.getRel()))
{
// FIXME: need an easy way to add this information
// @type :: URI-AR dc:format
// @hreflang :: URI-AR dc:language
// @title :: URI-AR dc:title
// @length :: URI-AR dcterms:extent
}
// atom:link@rel='related' :: URI-A-other
if ("related".equals(link.getRel()))
{
ar.addAggregation(new URI(link.getHref()));
// FIXME: need an easy way to add this information
// @hreflang :: URI-AR dc:language
// @title :: URI-AR dc:title
}
// atom:link@rel='via' :: URI-P-other
if ("via".equals(link.getRel()))
{
if (proxy != null)
{
proxy.setLineage(new URI(link.getRel()));
// FIXME: need an easy way to add this information
// @type :: URI-P-other dc:format
// @hreflang :: URI-P-other dc:language
// @title :: URI-P-other dc:title
}
}
}
// atom:author :: URI-AR dcterms:creator
if (authors != null)
{
for (Person author : authors)
{
Agent creator;
String authorURI = author.getUri();
if (authorURI != null)
{
creator = OREFactory.createAgent(new URI(authorURI));
}
else
{
creator = OREFactory.createAgent();
}
String name = author.getName();
if (name != null)
{
creator.addName(name);
}
String mbox = author.getEmail();
if (mbox != null)
{
if (!mbox.startsWith("mailto:"))
{
mbox = "mailto:" + mbox;
}
creator.addMbox(new URI(mbox));
}
ar.addCreator(creator);
}
}
// FIXME: need easier way of adding this information
// atom:title :: URI-AR dc:title
// atom:category :: URI-AR rdf:type
for (Category category : categories)
{
ar.addType(new URI(category.getTerm()));
// @scheme :: URI-term rdfs:isDefinedBy
// @label :: URI-term rdfs:label
}
// FIXME: the model does not yet support this
// atom:contributors :: URI-AR dcterms:contributor
// FIXME: need easier way of adding this information
// atom:summary :: URI-AR dcterms:abstract
// rdf:Description :: Add to model directly
Element root = new Element("RDF", rdfNS);
for (Element element : rdf)
{
root.addContent(element);
}
XMLOutputter out = new XMLOutputter();
ByteArrayOutputStream os = new ByteArrayOutputStream();
out.output(root, os);
ar.addRDF(os.toString(), "RDF/XML");
}
/**
* is this a valid resource map (i.e. is there a category saying so)?
*
* @param categories
* @throws OREParserException
*/
private void validate(List categories)
throws OREParserException
{
for (Category category : categories)
{
String aggURI = OREVocabulary.aggregation.getUri().toString();
if (aggURI.equals(category.getTerm()))
{
return;
}
}
throw new OREParserException("Passed ATOM document is not an ORE Resource Map; it is missing a valid atom:category statement");
}
private URI getURIR(List links)
throws OREParserException
{
try
{
for (Link link : links)
{
String rel = link.getRel();
if ("self".equals(rel))
{
return new URI(link.getHref());
}
}
throw new OREParserException("Passed ATOM document does not contain a URI for the Resource Map; atom:link[@rel='self']");
}
catch (URISyntaxException e)
{
throw new OREParserException("unable to parse link in atom:link[@rel='self']", e);
}
}
private URI getURIAR(List links)
throws OREParserException
{
try
{
for (Link link : links)
{
String rel = link.getRel();
if ("alternate".equals(rel))
{
return new URI(link.getHref());
}
}
throw new OREParserException("Passed ATOM document does not contain a URI for the Aggregated Resource");
}
catch (URISyntaxException e)
{
throw new OREParserException("unable to parse link in atom:link[@rel='alternate']", e);
}
}
public void configure(Properties properties)
{
//To change body of implemented methods use File | Settings | File Templates.
}
/* backup copy; has been re-written above
public ResourceMap parse(InputStream is)
throws OREParserException
{
try
{
// read in the ATOM document
XmlReader reader = new XmlReader(is);
WireFeedInput input = new WireFeedInput();
Feed atom = (Feed) input.build(reader);
// mine the atom feed
URI uri_a = new URI(atom.getId());
String title = atom.getTitle();
List authors = atom.getAuthors();
List links = atom.getOtherLinks();
List altLinks = atom.getAlternateLinks();
links.addAll(altLinks); // add all the links together
List categories = atom.getCategories();
List rdf = (List) atom.getForeignMarkup();
Date updated = atom.getUpdated();
Generator generator = atom.getGenerator();
String rights = atom.getRights();
List entries = atom.getEntries();
this.validate(categories);
URI uri_r = this.getURIR(links);
// extract the goodies from the embedded RDF
AggregationRDF ardf = new AggregationRDF();
RemRDF rrdf = new RemRDF();
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
Namespace oreNs = Namespace.getNamespace("ore", "http://www.openarchives.org/ore/terms/");
Namespace rdfNs = Namespace.getNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
Namespace dcNs = Namespace.getNamespace("dc", "http://purl.org/dc/terms/");
for (Element element : rdf)
{
String about = element.getAttributeValue("about");
if (about.equals(uri_a.toString()))
{
ardf.setAbout(about);
List children = element.getChildren("isAggregatedBy", oreNs);
List aggregatedBy = new ArrayList();
for (Element child : children)
{
String aggregator = child.getValue().trim();
aggregatedBy.add(aggregator);
}
ardf.setAggregatedBy(aggregatedBy);
List kids = element.getChildren("created", dcNs);
List created = new ArrayList();
for (Element kid : kids)
{
String create = kid.getValue().trim();
Date date = sdf.parse(create);
created.add(date);
}
ardf.setCreated(created);
}
else if (about.equals(uri_r.toString()))
{
rrdf.setAbout(about);
List kids = element.getChildren("created", dcNs);
List created = new ArrayList();
for (Element kid : kids)
{
String create = kid.getValue().trim();
Date date = sdf.parse(create);
created.add(date);
}
rrdf.setCreated(created);
}
}
// construct our resource map
ResourceMap rem = OREFactory.createResourceMap(uri_r);
if (updated != null)
{
rem.setModified(updated);
}
if (rights != null)
{
rem.setRights(rights);
}
rem.setCreated(rrdf.getCreated().get(0));
if (generator != null)
{
Agent remCreator = OREFactory.createAgent();
remCreator.addName(generator.getValue());
String genURL = generator.getUrl();
if (genURL != null)
{
// remCreator.addSeeAlso(new URI(genURL));
}
rem.addCreator(remCreator);
}
// construct the aggregation
Aggregation aggregation = OREFactory.createAggregation(uri_a);
aggregation.addTitle(title);
aggregation.setCreated(ardf.getCreated().get(0));
List rems = new ArrayList();
for (String aggregator : ardf.getAggregatedBy())
{
// FIXME: can we get the mimetype from anywhere?
ReMSerialisation serial = new ReMSerialisation();
serial.setURI(new URI(aggregator));
rems.add(serial);
}
aggregation.setReMSerialisations(rems);
List types = new ArrayList();
for (Category category : categories)
{
String aggURI = OREVocabulary.aggregation.getUri().toString();
if (!aggURI.equals(category.getTerm()))
{
// these specify the aggregation type
String type = category.getTerm();
types.add(type);
}
}
// aggregation.setTypes(types);
List creators = new ArrayList();
for (Person author : authors)
{
Agent creator = OREFactory.createAgent();
creator.addName(author.getName());
creators.add(creator);
}
aggregation.setCreators(creators);
List similars = new ArrayList();
for (Link link : links)
{
String rel = link.getRel();
if ("related".equals(rel))
{
similars.add(new URI(link.getHref()));
}
}
aggregation.setSimilarTo(similars);
// process the entries, each of which is an AggregatedResource
List ars = new ArrayList();
for (Entry entry : entries)
{
// mine the entry
URI uri_ar = new URI(entry.getId());
String arTitle = entry.getTitle();
Date arUpdated = entry.getUpdated();
List arAlternates = entry.getAlternateLinks();
List arCategories = entry.getCategories();
AggregatedResource ar = OREFactory.createAggregatedResource(uri_ar);
ars.add(ar);
}
// construct the model
aggregation.setAggregatedResources(ars);
rem.setAggregation(aggregation);
return rem;
}
catch (IOException e)
{
throw new OREParserException(e);
}
catch (FeedException e)
{
throw new OREParserException(e);
}
catch (URISyntaxException e)
{
throw new OREParserException(e);
}
catch (OREException e)
{
throw new OREParserException(e);
}
catch (ParseException e)
{
throw new OREParserException(e);
}
}*/
///////////////////////////////////////////////////////////////////
// Privately used classes
///////////////////////////////////////////////////////////////////
// FIXME: these should no longer be necessary
private class AggregationRDF
{
private String about;
private List isAggregatedBy;
private List created;
public String getAbout()
{
return about;
}
public void setAbout(String about)
{
this.about = about;
}
public List getAggregatedBy()
{
return isAggregatedBy;
}
public void setAggregatedBy(List aggregatedBy)
{
isAggregatedBy = aggregatedBy;
}
public List getCreated()
{
return created;
}
public void setCreated(List created)
{
this.created = created;
}
}
private class RemRDF
{
private String about;
private List created;
public String getAbout()
{
return about;
}
public void setAbout(String about)
{
this.about = about;
}
public List getCreated()
{
return created;
}
public void setCreated(List created)
{
this.created = created;
}
}
}