Warning: These documents are under active development and subject to change (version 2.1.0-beta).
The latest release documents are at: https://purl.dataone.org/architecture

Components of the DataONE Infrastructure

The following diagrams indicate code dependencies between various components that make up the DataONE infrastructure. Functional dependencies are not depicted.

Common Components

Figure 1. Core shared components.

digraph core_components {

   fontname = "Bitstream Vera Sans";
   fontsize = 8;

   edge [
     fontname = "Bitstream Vera Sans"
     fontsize = 8
     color = "#888888"
     arrowhead = "open"
     arrowsize = 0.5
     style="solid"
     ];

   node [
     fontname = "Courier"
     fontsize = 8
     fontcolor = "black"];

   color="#888888";

   schema [label="dataonetypes.xsd" URL="#dataonetypes"];
   d1_common_java;
   d1_common_python;
   d1_libclient_java;
   d1_libclient_python;
   d1_architecture;
   d1_jibx_extensions;
   d1_test_resources;
   foresite_java [style="filled" bgcolor="grey80"];
   foresite_python [style="filled" bgcolor="grey80"];

   d1_architecture -> schema;
   d1_common_java -> schema;
   d1_common_java -> d1_jibx_extensions;
   d1_common_java -> d1_test_resources [style="dashed"];
   d1_common_java -> d1_architecture [style="dotted"];
   d1_common_python -> schema;
   d1_common_python  -> d1_architecture [style="dotted"];
   d1_libclient_java -> d1_common_java;
   d1_libclient_java -> d1_test_resources [style="dashed"];
   d1_libclient_java  -> d1_architecture [style="dotted"];
   d1_libclient_java  -> foresite_java;
   d1_libclient_python -> d1_common_python;
   d1_libclient_python -> d1_architecture [style="dotted"];
   d1_libclient_python -> foresite_python;

}

NEEDS VERIFYING

Integration Test Tools

Figure 2. Test services.

digraph core_components {

    color="#888888";
    fontname = "Bitstream Vera Sans";
    fontsize = 8;

    edge [
      fontname = "Bitstream Vera Sans"
      fontsize = 8
      color = "#888888"
      arrowhead = "open"
      arrowsize = 0.5
      style="solid"
      ];

    node [
      fontname = "Courier"
      fontsize = 8
      fontcolor = "black"];

    subgraph cluster_CORE {
      label="Shared Components";
      color="#888888";

      schema [label="dataonetypes.xsd"];
      d1_common_java;
      d1_common_python;
      d1_libclient_java;
      d1_libclient_python;
      d1_architecture;
      d1_jibx_extensions;
      d1_test_resources;
      foresite_java [style="filled" bgcolor="grey80"];
      foresite_python [style="filled" bgcolor="grey80"];

      d1_architecture -> schema;
      d1_common_java -> schema;
      d1_common_java -> d1_jibx_extensions;
      d1_common_java -> d1_test_resources [style="dashed"];
      d1_common_java -> d1_architecture [style="dotted"];
      d1_common_python -> schema;
      d1_common_python  -> d1_architecture [style="dotted"];
      d1_libclient_java -> d1_common_java;
      d1_libclient_java -> d1_test_resources [style="dashed"];
      d1_libclient_java  -> d1_architecture [style="dotted"];
      d1_libclient_java  -> foresite_java;
      d1_libclient_python -> d1_common_python;
      d1_libclient_python -> d1_architecture [style="dotted"];
      d1_libclient_python -> foresite_python;
    }

    d1_integration;
    d1_web_test_site;
    d1_instance_generator;
    d1_echo_service;

    d1_integration -> d1_common_java;
    d1_integration -> d1_libclient_java;
    d1_integration -> Certificates;

    d1_web_test_site -> d1_integration;

    d1_instance_generator -> d1_common_python;
    d1_instance_generator -> d1_libclient_python;
}

Coordinating Node Components

Figure 3. Coordinating node components.

digraph core_components {

   color="#888888";
   fontname = "Bitstream Vera Sans";
   fontsize = 10;

   edge [
     fontname = "Bitstream Vera Sans"
     fontsize = 8
     color = "#888888"
     arrowhead = "open"
     arrowsize = 0.5
     style="solid"
     ];

   node [
     fontname = "Courier"
     fontsize = 10
     fontcolor = "black"];

   subgraph cluster_CORE {
     label="Shared Components";
     color="#888888";

     schema [label="dataonetypes.xsd"];
     d1_common_java;
     d1_common_python;
     d1_libclient_java;
     d1_libclient_python;
     d1_architecture;
     d1_jibx_extensions;
     d1_test_resources;
     foresite_java [style="filled" bgcolor="grey80"];
     foresite_python [style="filled" bgcolor="grey80"];

     d1_architecture -> schema;
     d1_common_java -> schema;
     d1_common_java -> d1_jibx_extensions;
     d1_common_java -> d1_test_resources [style="dashed"];
     d1_common_java -> d1_architecture [style="dotted"];
     d1_common_python -> schema;
     d1_common_python  -> d1_architecture [style="dotted"];
     d1_libclient_java -> d1_common_java;
     d1_libclient_java -> d1_test_resources [style="dashed"];
     d1_libclient_java  -> d1_architecture [style="dotted"];
     d1_libclient_java  -> foresite_java;
     d1_libclient_python -> d1_common_python;
     d1_libclient_python -> d1_architecture [style="dotted"];
     d1_libclient_python -> foresite_python;
   }


   postgres [style="filled" fillcolor="grey80"];
   hazelcast [style="filled" bgcolor="grey80"];
   LDAP [style="filled" bgcolor="grey80"];
   SOLR [style="filled" bgcolor="grey80"];
   cilogon_portal_servlet [style="filled" bgcolor="grey80" label="cilogon-portal-servlet"];

   d1_cn_version_tool;
   d1_cn_common;
   d1_cn_index_common;
   d1_cn_index_generator;
   d1_cn_noderegistry;
   d1_mercury_common;
   d1_portal;
   d1_cn_approve_node;
   d1_identity_manager;
   d1_log_aggregation;
   d1_mercury_ui;
   d1_portal_servlet;
   d1_cn_rest;
   d1_cn_rest_proxy;
   d1_cn_service;
   d1_solr_extensions;
   d1_synchronization;
   d1_cn_index_processor;
   d1_replication;
   d1_cn_index_tool;
   d1_process_daemon;
   cn_metacat;

   d1_cn_common -> d1_common_java;
   d1_cn_common -> hazelcast;

   d1_cn_index_common -> d1_common_java;
   d1_cn_index_common -> postgres;

   d1_cn_index_generator -> d1_cn_index_common;
   d1_cn_index_generator -> d1_common_java;
   d1_cn_index_generator -> d1_cn_common;
   d1_cn_index_generator -> postgres;
   d1_cn_index_generator -> hazelcast;

   d1_cn_noderegistry -> d1_cn_common;
   d1_cn_noderegistry -> LDAP;
   d1_cn_noderegistry -> hazelcast;

   d1_mercury_common -> d1_common_java;
   d1_mercury_common -> d1_libclient_java;
   d1_mercury_common -> d1_portal;

   d1_portal -> d1_common_java;
   d1_portal -> d1_libclient_java;

   d1_cn_approve_node -> d1_cn_noderegistry;
   d1_cn_approve_node -> d1_libclient_java;

   d1_identity_manager -> d1_libclient_java;
   d1_identity_manager -> d1_cn_noderegistry;

   d1_log_aggregation -> d1_libclient_java;
   d1_log_aggregation -> d1_cn_noderegistry;

   d1_mercury_ui -> d1_common_java;
   d1_mercury_ui -> d1_libclient_java;
   d1_mercury_ui -> d1_portal;

   d1_portal_servlet -> cilogon_portal_servlet;
   d1_portal_servlet -> d1_portal;
   d1_portal_servlet -> hazelcast;

   d1_cn_rest_proxy -> d1_cn_common;

   d1_cn_rest -> d1_libclient_java;
   d1_cn_rest -> d1_identity_manager;
   d1_cn_rest -> d1_portal;
   d1_cn_rest -> d1_cn_rest_proxy;
   d1_cn_rest -> d1_cn_noderegistry;
   d1_cn_rest -> d1_cn_common;

   d1_cn_service -> d1_cn_rest;
   d1_cn_service -> d1_cn_rest_proxy;

   d1_solr_extensions -> d1_libclient_java;
   d1_solr_extensions -> d1_cn_common;
   d1_solr_extensions -> d1_identity_manager;
   d1_solr_extensions -> d1_cn_noderegistry;
   d1_solr_extensions -> d1_portal;

   d1_synchronization -> d1_libclient_java;
   d1_synchronization -> d1_cn_noderegistry;
   d1_synchronization -> d1_identity_manager;
   d1_synchronization -> hazelcast;

   d1_cn_index_processor -> d1_cn_common;
   d1_cn_index_processor -> d1_libclient_java;
   d1_cn_index_processor -> d1_cn_index_common;
   d1_cn_index_processor -> d1_cn_index_generator;
   d1_cn_index_processor -> postgres;
   d1_cn_index_processor -> SOLR;

   d1_replication -> d1_cn_noderegistry;
   d1_replication -> d1_cn_common;
   d1_replication -> d1_libclient_java;
   d1_replication -> hazelcast;

   d1_cn_index_tool -> d1_common_java;
   d1_cn_index_tool -> d1_libclient_java;
   d1_cn_index_tool -> d1_cn_common;
   d1_cn_index_tool -> d1_cn_index_common;
   d1_cn_index_tool -> d1_cn_index_generator;
   d1_cn_index_tool -> d1_cn_index_processor;

   d1_process_daemon -> d1_synchronization;
   d1_process_daemon -> d1_log_aggregation;
   d1_process_daemon -> d1_replication;
   d1_process_daemon -> hazelcast;

   cn_metacat -> d1_common_java;
   cn_metacat -> d1_libclient_java;
   cn_metacat -> postgres;
   cn_metacat -> hazelcast;
   cn_metacat -> foresite_java;

}

Member Node Components and Instances

Figure 4. Member node implementations (ovals) and instances (rectangles).

NEEDS UPDATING

digraph core_components {

    color="#888888";
    fontname = "Bitstream Vera Sans";
    fontsize = 8;

    edge [
      fontname = "Bitstream Vera Sans"
      fontsize = 8
      color = "#888888"
      arrowhead = "open"
      arrowsize = 0.5
      style="solid"
      ];

    node [
      fontname = "Courier"
      fontsize = 8
      fontcolor = "black"];

    subgraph cluster_CORE {
      label="Shared Components";
      color="#888888";

      schema [label="dataonetypes.xsd"];
      d1_common_java;
      d1_common_python;
      d1_libclient_java;
      d1_libclient_python;
      d1_architecture;
      foresite_java [style="filled" bgcolor="grey80"];
      foresite_python [style="filled" bgcolor="grey80"];

      d1_architecture -> schema;
      d1_common_java -> schema;
      d1_common_java -> d1_jibx_extensions;
      d1_common_java -> d1_test_resources [style="dashed"];
      d1_common_java -> d1_architecture [style="dotted"];
      d1_common_python -> schema;
      d1_common_python  -> d1_architecture [style="dotted"];
      d1_libclient_java -> d1_common_java;
      d1_libclient_java -> d1_test_resources [style="dashed"];
      d1_libclient_java  -> d1_architecture [style="dotted"];
      d1_libclient_java  -> foresite_java;
      d1_libclient_python -> d1_common_python;
      d1_libclient_python -> d1_architecture [style="dotted"];
      d1_libclient_python -> foresite_python;
    }

    Metacat;
    Mercury_MN;

    KNB [shape="record"];
    ORNL_DAAC [shape="record"];
    USGS [shape="record"];
    Dryad [shape="record"];
    Merritt [shape="record"];
    SAN_Parks [shape="record"];
    MN_Replication_UNM_1 [shape="record"];
    MN_Replication_UNM_2 [shape="record"];
    MN_Replication_UCSB_1 [shape="record"];
    MN_Replication_UCSB_2 [shape="record"];
    MN_Replication_ORC_1 [shape="record"];
    MN_Replication_ORC_2 [shape="record"];

    Metacat -> d1_common_java;
    Metacat -> d1_libclient_java;
    KNB -> Metacat;
    SAN_Parks -> Metacat;
    Merritt -> Metacat;

    Mercury_MN -> d1_common_java;
    ORNL_DAAC -> Mercury_MN;
    USGS -> Mercury_MN;

    GMN -> d1_common_python;
    GMN -> d1_libclient_python;
    MN_Replication_UNM_1 -> GMN;
    MN_Replication_UCSB_1 -> GMN;
    MN_Replication_ORC_1 -> GMN;

    MN_Replication_UNM_2 -> Metacat;
    MN_Replication_UCSB_2 -> Metacat;
    MN_Replication_ORC_2 -> Metacat;

    Dryad -> d1_common_java;
    Dryad -> d1_libclient_java;
}

Investigator Toolkit Components

Figure 5. Investigator toolkit.

NEEDS UPDATING

digraph core_components {

    color="#888888";
    fontname = "Bitstream Vera Sans";
    fontsize = 8;

    edge [
      fontname = "Bitstream Vera Sans"
      fontsize = 8
      color = "#888888"
      arrowhead = "open"
      arrowsize = 0.5
      style="solid"
      ];

    node [
      fontname = "Courier"
      fontsize = 8
      fontcolor = "black"];

    subgraph cluster_CORE {
      label="Shared Components";
      color="#888888";

      schema [label="dataonetypes.xsd"];
      d1_common_java;
      d1_common_python;
      d1_libclient_java;
      d1_libclient_python;
      d1_architecture;
      foresite_java [style="filled" bgcolor="grey80"];
      foresite_python [style="filled" bgcolor="grey80"];

      d1_architecture -> schema;
      d1_common_java -> schema;
      d1_common_java -> d1_jibx_extensions;
      d1_common_java -> d1_test_resources [style="dashed"];
      d1_common_java -> d1_architecture [style="dotted"];
      d1_common_python -> schema;
      d1_common_python  -> d1_architecture [style="dotted"];
      d1_libclient_java -> d1_common_java;
      d1_libclient_java -> d1_test_resources [style="dashed"];
      d1_libclient_java  -> d1_architecture [style="dotted"];
      d1_libclient_java  -> foresite_java;
      d1_libclient_python -> d1_common_python;
      d1_libclient_python -> d1_architecture [style="dotted"];
      d1_libclient_python -> foresite_python;
    }

    d1_client_cli;
    d1_client_r;
    d1_client_fuse;
    d1_client_dokan;

    d1_client_cli -> d1_common_python;
    d1_client_cli -> d1_libclient_python;
    d1_client_r -> d1_common_java;
    d1_client_r -> d1_libclient_java;
    d1_client_fuse -> d1_common_python;
    d1_client_fuse -> d1_libclient_python;
    d1_client_dokan -> d1_client_fuse;
}

Table of Components

Components
Component Category Responsible Description
dataonetypes Common MJ Schema used for defining serialization of core data types
d1_architecture Common DV The system architecture documentation
operations Operations DV Operations documentation - servers etc
d1_common_java Common RW Base DataONE library in Java
d1_common_python Common RD Base DataONE library in Python
d1_libclient_java Common RN Client library implemented in Java
d1_libclient_python Common RD Client library implemented in Python
d1_web_test_site Testing RN Member node integration testing service
d1_echo_service Testing DV A HTTP echo service used for testing
d1_integration Testing RN Integration testing for components and combinations thereof
Certificates Testing RW Generation and management of certificates for use by server components
d1_instance_generator Testing RD Generates example instances of objects defined in dataoneTypes.xsd
one_mercury CN GP The search interface that is implemented by the Mercury search index
cn_metacat CN CJ The Metacat application. Currently employed as the replicated object store on Coordinating Nodes.
d1_cn_index_processor CN SR/DV Populates the SOLR index by extracting informaton from system metadata, science metadata and resource maps.
d1_cn_index_generator CN SR/DV Generates indexing tasks when new objects appear or system metadata changes
d1_cn_index_common CN SR/DV Code shared between the indexing components
indexerapi CN SR/DV A library used by the index_processor for extracting content from various types of XML structures such as system metadata, science metadata and resource maps.
d1_portal_servlet CN BL Provides a UI for interacting with the CILogon service, an authentication proxy service
d1_portal CN BL Implements the certificate manager used by the portal servlets
d1_identity_manager CN BL Provides mechanisms for managing subjects in dataone
d1_process_daemon CN RW Monitors content on member nodes, creating tasks for synchronization and replication
d1_synchronization CN RW Manages the synchronization of content between Member Nodes and the Coordinating Nodes.
d1_replication CN CJ Manages replication of content between Member Nodes
d1_cn_noderegistry CN RW A register of coordinating and member nodes participating in a DataONE environment
d1_cn_common CN RW A library of code shared between coordintating node components
d1_cn_rest CN RW The coordinating node HTTP REST service interface
d1_cn_rest_proxy CN RW Proxies requests coming in to a CN to underlying service implementations such as the object store (i.e. Metacat)
d1_cn_service CN RW Coordinating node service, implementing the service APIs, data storage, and CN replication.
d1_simple_search CN DV A simple search interface using Javascript and the SOLR interface.
Metacat MN CJ The Metacat application. Implements the DataONE MN service interfaces.
Dryad MN RS A member node implementation and instance for the Dryad repository
GMN MN RD A generic, standalone Member Node implementation written in Python using the Django framework.
Mercury_MN MN JG Mercury implementation of the Member Node services
d1_client_cli ITK RD A command line client for interacting with the DataONE infrastructure. Currently implemented using d1_libclient_python.
d1_client_fuse ITK DV A FUSE driver for mounting the DataONE infrastructure as a file system.
d1_client_dokan ITK DV An extention of the FUSE driver that is based on Dokan for use on Microsoft Windows systems.
d1_client_r ITK MJ A plugin for R that enables access to DataONE content from the R application. Implemented using d1_libclient_java.
hzpeek Testing DV A tool for examining the Hazelcast queues on the CNs
debian_packaging CN RW Debian packages for the CN components