-- use default values initially CREATE TABLE doi_registration AS select text('testing') as ezid_account, doc.doctype, id.guid, 'https://cn.dataone.org/cn/v1/resolve/' || regexp_replace(regexp_replace(id.guid, '/', '%2F', 'g'), ':', '%3A', 'g') as url, text('Unknown') as title, text(null) as creator, text(null) as publisher, to_char(doc.date_created, 'YYYY') as pub_date from identifier id, xml_documents doc where guid like 'doi%' and id.docid = doc.docid and id.rev = doc.rev UNION ALL select text('testing') as ezid_account, doc.doctype, id.guid, 'https://cn.dataone.org/cn/v1/resolve/' || regexp_replace(regexp_replace(id.guid, '/', '%2F', 'g'), ':', '%3A', 'g') as url, text('Unknown') as title, text(null) as creator, text(null) as publisher, to_char(doc.date_created, 'YYYY') as pub_date from identifier id, xml_revisions doc where guid like 'doi%' and id.docid = doc.docid and id.rev = doc.rev; --update defaults update doi_registration set title = 'Data file' where doctype = 'BIN'; update doi_registration set title = 'Legacy EML file' where doctype like '%eml%2.0.0beta%'; -- update title using node information update doi_registration doi set title = child.nodedata from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and nodes.nodename = 'title' and parent.nodename = 'dataset'; -- update pubDate using nodes update doi_registration doi set pub_date = child.nodedata from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and nodes.nodename = 'pubDate' and parent.nodename = 'dataset'; -- update publisher using nodes update doi_registration doi set publisher = child.nodedata from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and nodes.nodename = 'organizationName' and parent.nodename = 'publisher' and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT'); -- update publisher surName update doi_registration doi set publisher = child.nodedata from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child, xml_nodes grandparent where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and parent.parentnodeid = grandparent.nodeid and nodes.nodename = 'surName' and parent.nodename = 'individualName' and grandparent.nodename = 'publisher' and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT'); -- add the first name if we have it update doi_registration doi set publisher = publisher ||', '|| child.nodedata from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child, xml_nodes grandparent where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and parent.parentnodeid = grandparent.nodeid and nodes.nodename = 'givenName' and parent.nodename = 'individualName' and grandparent.nodename = 'publisher' and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT'); -- update organization using nodes update doi_registration doi set creator = child.nodedata from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and nodes.nodename = 'organizationName' and parent.nodename = 'creator' and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT'); -- update creator using nodes -- creator/individualName/surName -- creator/individualName/givenName update doi_registration doi set creator = child.nodedata from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child, xml_nodes grandparent where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and parent.parentnodeid = grandparent.nodeid and nodes.nodename = 'surName' and parent.nodename = 'individualName' and grandparent.nodename = 'creator' and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT'); -- add the first name if we have it update doi_registration doi set creator = creator ||', '|| child.nodedata from identifier id, xml_documents docs, xml_nodes nodes, xml_nodes parent, xml_nodes child, xml_nodes grandparent where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and parent.parentnodeid = grandparent.nodeid and nodes.nodename = 'givenName' and parent.nodename = 'individualName' and grandparent.nodename = 'creator' and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT'); -- update title using revisions update doi_registration doi set title = child.nodedata from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and nodes.nodename = 'title' and parent.nodename = 'dataset'; -- update pubDate using revisions update doi_registration doi set pub_date = child.nodedata from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and nodes.nodename = 'pubDate' and parent.nodename = 'dataset'; -- update publisher using revisions update doi_registration doi set publisher = child.nodedata from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and nodes.nodename = 'organizationName' and parent.nodename = 'publisher' and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT'); -- use publisher individual name if we have it update doi_registration doi set publisher = child.nodedata from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child, xml_nodes_revisions grandparent where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and parent.parentnodeid = grandparent.nodeid and nodes.nodename = 'surName' and parent.nodename = 'individualName' and grandparent.nodename = 'publisher' and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT'); --include the first name (careful not to rerun this part) update doi_registration doi set publisher = publisher ||', '|| child.nodedata from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child, xml_nodes_revisions grandparent where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and parent.parentnodeid = grandparent.nodeid and nodes.nodename = 'givenName' and parent.nodename = 'individualName' and grandparent.nodename = 'publisher' and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT'); -- update organization using nodes update doi_registration doi set creator = child.nodedata from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and nodes.nodename = 'organizationName' and parent.nodename = 'creator' and nodes.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = parent.nodeid and nodetype = 'ELEMENT'); -- update creator using revisions update doi_registration doi set creator = child.nodedata from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child, xml_nodes_revisions grandparent where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and parent.parentnodeid = grandparent.nodeid and nodes.nodename = 'surName' and parent.nodename = 'individualName' and grandparent.nodename = 'creator' and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT'); -- add the first name if we have it update doi_registration doi set creator = creator ||', '|| child.nodedata from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child, xml_nodes_revisions grandparent where doi.guid = id.guid and id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and parent.parentnodeid = grandparent.nodeid and nodes.nodename = 'givenName' and parent.nodename = 'individualName' and grandparent.nodename = 'creator' and parent.nodeid = (select min(thisnode.nodeid) from xml_nodes_revisions thisnode where thisnode.parentnodeid = grandparent.nodeid and nodetype = 'ELEMENT'); -- use xml_documents for defaults that are still missing update doi_registration doi set creator = doc.user_owner from xml_documents doc, identifier id where doi.guid = id.guid and id.docid = doc.docid and id.rev = doc.rev and doi.creator is null; update doi_registration doi set creator = doc.user_owner from xml_revisions doc, identifier id where doi.guid = id.guid and id.docid = doc.docid and id.rev = doc.rev and doi.creator is null; -- set publisher update doi_registration set publisher = creator where publisher is null or publisher = ''; -- clean up update doi_registration set title = replace(title, E'\n', ' '); update doi_registration set title = regexp_replace(title, E'\\s+', ' ', 'g'); update doi_registration set title = regexp_replace(title, E'^\\s+', '', 'g'); update doi_registration set ezid_account = 'sb_nceas' where guid like 'doi:10.5063%'; --16988 update doi_registration set ezid_account = 'sb_pisco' where guid like 'doi:10.6085%'; --98078 update doi_registration set ezid_account = 'lternet' where guid like 'doi:10.6073%'; --49482 /** * Additional modifications, July 5th, 2012 */ ALTER TABLE doi_registration ADD COLUMN resourceType text, ADD COLUMN objectFormat text, ADD COLUMN obsoletedBy text, ADD COLUMN obsoletes text, ADD COLUMN resourceMapId text, ADD COLUMN resourceMapLocation text, ADD COLUMN access text; -- update access values update doi_registration set access = 'protected'; update doi_registration doi set access = 'public' from xml_access a where doi.guid = a.guid and a.principal_name = 'public' and a.permission >= '4' and a.perm_type = 'allow'; -- update the objectFormat from xml_documents update doi_registration doi set objectFormat = doc.doctype from identifier id, xml_documents doc where doi.guid = id.guid and id.docid = doc.docid and id.rev = doc.rev; -- update the objectFormat from xml_revisions update doi_registration doi set objectFormat = rev.doctype from identifier id, xml_revisions rev where doi.guid = id.guid and id.docid = rev.docid and id.rev = rev.rev; --update resourceType update doi_registration set resourceType = 'Dataset/metadata'; update doi_registration set resourceType = 'Dataset/data' where objectFormat = 'BIN'; -- update the data file titles update doi_registration doi set title = 'Data file - ' || doc.docname from identifier id, xml_documents doc where doi.guid = id.guid and id.docid = doc.docid and id.rev = doc.rev and doi.resourceType = 'Dataset/data'; -- update the data file titles from revisions update doi_registration doi set title = 'Data file - ' || doc.docname from identifier id, xml_revisions doc where doi.guid = id.guid and id.docid = doc.docid and id.rev = doc.rev and doi.resourceType = 'Dataset/data'; -- update the objectFormat from SM table (will be subset) update doi_registration doi set objectFormat = sm.object_format from systemMetadata sm where doi.guid = sm.guid; --16938 --update revision history from SM update doi_registration doi set obsoletes = sm.obsoletes, obsoletedBy = sm.obsoleted_by from systemMetadata sm where doi.guid = sm.guid; /** use plain old revision history **/ -- update obsoletedby update doi_registration doi set obsoletedBy = newer.guid from identifier id, identifier newer where doi.guid = id.guid and id.docid = newer.docid and newer.rev = (select min(next.rev) from identifier next where next.docid = id.docid and next.rev > id.rev); -- update the obsolets update doi_registration doi set obsoletes = older.guid from identifier id, identifier older where doi.guid = id.guid and id.docid = older.docid and older.rev = (select max(prev.rev) from identifier prev where prev.docid = id.docid and prev.rev < id.rev); /** select doi.guid, older.guid as obsoletes from doi_registration doi, identifier id, identifier older where doi.guid = id.guid and id.docid = older.docid and older.rev = (select max(prev.rev) from identifier prev where prev.docid = id.docid and prev.rev < id.rev) and doi.guid like 'doi:10.5063/AA/ABS.4%'; **/ --update the resourcemapid for described data CREATE TABLE ecogrid_docids AS select distinct id.docid, id.rev, id.guid, substring(xpi.nodedata from 'ecogrid://knb/(.*)$') as data_docid from xml_path_index xpi, identifier id, xml_documents xmld where xpi.docid = xmld.docid and xmld.docid = id.docid and xmld.rev = id.rev and xpi.path like 'dataset/%/physical/distribution/online/url' and xpi.nodedata like 'ecogrid%' and id.guid in (select guid from doi_registration); -- include revisions for described data INSERT INTO ecogrid_docids select distinct id.docid, id.rev, id.guid, substring(child.nodedata from 'ecogrid://knb/(.*)$') as data_docid from identifier id, xml_revisions docs, xml_nodes_revisions nodes, xml_nodes_revisions parent, xml_nodes_revisions child where id.docid = docs.docid and id.rev = docs.rev and docs.rootnodeid = nodes.rootnodeid and nodes.nodeid = child.parentnodeid and nodes.parentnodeid = parent.nodeid and nodes.nodename = 'url' and parent.nodename = 'online' and child.nodedata like 'ecogrid%' and child.nodetype = 'TEXT' and id.guid in (select guid from doi_registration); -- Set the resource map for the data files (NOTE: some of thee maps might not actually exist on the system depending on how successful the ORE generation was) update doi_registration doi set resourceMapId = 'resourceMap_'||eco.docid||'.'||eco.rev from identifier id, ecogrid_docids eco where doi.guid = id.guid and id.docid||'.'||id.rev = eco.data_docid; -- set the resource map id for the metadata file that did most of the packaging work! update doi_registration doi set resourceMapId = 'resourceMap_'||eco.docid||'.'||eco.rev from ecogrid_docids eco where doi.guid = eco.guid; select count(*) from doi_registration where resourceMapId is null; --update the resource map location update doi_registration set resourceMapLocation = 'https://cn.dataone.org/cn/v1/resolve/' || resourceMapId where resourceMapId is not null; -- fix null values (should not have to have this now, but it will not hurt) update doi_registration doi set creator = user_owner from identifier id, xml_documents docs where doi.guid = id.guid and id.docid = docs.docid and id.rev= docs.rev and (creator is null or trim(creator) = ''); update doi_registration doi set publisher = user_owner from identifier id, xml_documents docs where doi.guid = id.guid and id.docid = docs.docid and id.rev= docs.rev and (publisher is null or trim(publisher) = ''); update doi_registration doi set creator = user_owner from identifier id, xml_revisions docs where doi.guid = id.guid and id.docid = docs.docid and id.rev= docs.rev and (creator is null or trim(creator) = ''); update doi_registration doi set publisher = user_owner from identifier id, xml_revisions docs where doi.guid = id.guid and id.docid = docs.docid and id.rev= docs.rev and (publisher is null or trim(publisher) = ''); -- fix a previous mistake update doi_registration set publisher = null where publisher = 'document'; --update creator usinig LDAP lookup CREATE TABLE ecoinfo_dn (dn text, givenName text, sn text); --TRUNCATE TABLE ecoinfo_dn; COPY ecoinfo_dn FROM '/tmp/ecoinfo_dn.csv' WITH CSV HEADER; update doi_registration set creator = case when givenName is null then sn else sn || ', ' || givenName end from ecoinfo_dn where trim(both from lower(creator)) = lower(dn); update doi_registration set publisher = case when givenName is null then sn else sn || ', ' || givenName end from ecoinfo_dn where trim(both from lower(publisher)) = lower(dn); select creator, count(*) as cnt from doi_registration where creator like 'uid=%' group by creator order by cnt desc; -- update the data files with owner/publisher from EML entry update doi_registration doi set creator = meta.creator, publisher = meta.publisher from doi_registration meta, identifier id, ecogrid_docids eco where doi.guid = id.guid and id.docid||'.'||id.rev = eco.data_docid and eco.guid = meta.guid; -- clean up KBS entries that have empty user_owner update doi_registration set creator = 'Kellogg Biological Station' where (creator is null or trim(creator) = '') and guid like '%knb-lter-kbs%'; update doi_registration set publisher = 'Kellogg Biological Station' where (publisher is null or trim(publisher) = '') and guid like '%knb-lter-kbs%'; -- more clean up update doi_registration set creator = trim(creator), publisher = trim(publisher); -- set the publisher to the source system if it is the same as the creator update doi_registration set publisher = case when guid like 'doi:10.5063%' then 'Knowledge Network for Biocomplexity (KNB)' when guid like 'doi:10.6085%' then 'Partnership for Interdisciplinary Studies of Coastal Oceans (PISCO)' when guid like 'doi:10.6073%' then 'Long Term Ecological Research Network (LTER)' else publisher end where publisher = creator; -- entity references update doi_registration set publisher = regexp_replace(publisher, '&', '&', 'g'); update doi_registration set publisher = regexp_replace(publisher, ''', E'\'', 'g'); --accented i (í) update doi_registration set publisher = regexp_replace(publisher, 'í', 'í', 'g') where publisher like '%í%'; update doi_registration set creator = regexp_replace(creator, '&', '&', 'g'); update doi_registration set creator = regexp_replace(creator, ''', E'\'', 'g'); --titles update doi_registration set title = regexp_replace(title, '&', '&', 'g'); update doi_registration set title = regexp_replace(title, '∓', '&', 'g') where title like '%∓%'; update doi_registration set title = regexp_replace(title, ''', E'\'', 'g'); update doi_registration set title = regexp_replace(title, '"', E'\'', 'g'); update doi_registration set title = regexp_replace(title, '>', '>', 'g'); update doi_registration set title = regexp_replace(title, '<', '<', 'g'); update doi_registration set title = regexp_replace(title, 'í', 'í', 'g') where title like '%í%'; update doi_registration set title = regexp_replace(title, 'í', 'í', 'g') where title like '%í%'; update doi_registration set title = regexp_replace(title, 'ñ', 'í', 'g') where title like '%ñ%'; update doi_registration set title = regexp_replace(title, 'ó', 'ó', 'g') where title like '%ó%'; update doi_registration set title = regexp_replace(title, 'ã', 'ã', 'g') where title like '%ã%'; update doi_registration set title = regexp_replace(title, 'ç', 'ç', 'g') where title like '%ç%'; update doi_registration set title = regexp_replace(title, 'é', 'é', 'g') where title like '%é%'; update doi_registration set title = regexp_replace(title, 'ñ', 'ñ', 'g') where title like '%ñ%'; update doi_registration set title = regexp_replace(title, '–', '-', 'g') where title like '%–%'; update doi_registration set title = regexp_replace(title, '’', E'\'', 'g') where title like '%’%'; update doi_registration set title = regexp_replace(title, '–', '/', 'g') where title like '%–%'; update doi_registration set title = regexp_replace(title, 'á', 'á', 'g') where title like '%á%'; update doi_registration set title = regexp_replace(title, 'ó', 'ó', 'g') where title like '%ó%'; --格式範例 (格式范例) update doi_registration set title = regexp_replace(title, '格式範例', '格式范例', 'g') where title like '%格式範例%'; --directional quotes update doi_registration set title = regexp_replace(title, '”', '"', 'g') where title like '%”%'; update doi_registration set title = regexp_replace(title, '“', '"', 'g') where title like '%“%'; --one off apostrophe update doi_registration set title = regexp_replace(title, 'Õ', E'\'', 'g') where title like '%Õ%'; update doi_registration set title = regexp_replace(title, '–', '-', 'g') where title like '%–%'; --after reading the complete metadata, I could interpret these as "greater than" update doi_registration set title = regexp_replace(title, '&t;', '>', 'g') where title like '%&t;%' and guid in ('doi:10.6073/AA/knb-lter-luq.15.1', 'doi:10.6073/AA/knb-lter-luq.39.1', 'doi:10.6073/AA/knb-lter-luq.37.1', 'doi:10.6073/AA/knb-lter-luq.40.1', 'doi:10.6073/AA/knb-lter-luq.18.1' ); --special characters update doi_registration set creator = regexp_replace(creator, 'é', 'é', 'g') where creator like '%é%'; update doi_registration set creator = regexp_replace(creator, 'ç', 'ç', 'g') where creator like '%ç%'; update doi_registration set creator = regexp_replace(creator, 'ñ', 'ñ', 'g') where creator like '%ñ%'; -- Gutiérrez, Ralph, J. -- Liébault, Frédéric update doi_registration set creator = regexp_replace(creator, 'é', 'é', 'g') where creator like '%é%'; -- Ramírez, Alonso update doi_registration set creator = regexp_replace(creator, 'í', 'í', 'g') where creator like '%í%'; -- González, Grizelle update doi_registration set creator = regexp_replace(creator, 'á', 'á', 'g') where creator like '%á%'; --王, (王, 名, 姓) update doi_registration set creator = regexp_replace(creator, '王', '王', 'g') where creator like '%王%'; --名 update doi_registration set creator = regexp_replace(creator, '名', '名', 'g') where creator like '%名%'; --姓 update doi_registration set creator = regexp_replace(creator, '姓', '姓', 'g') where creator like '%姓%'; -- These are left -- Helmbrecht, S ’rai -- lkñlkl, Fgfggf -- clean up update doi_registration set publisher = replace(publisher, E'\n', ' '); update doi_registration set publisher = regexp_replace(publisher, E'\\s+', ' ', 'g'); update doi_registration set publisher = regexp_replace(publisher, E'^\\s+', '', 'g'); -- use only year for pub_date update doi_registration set pub_date = to_char(date(pub_date), 'YYYY') where length(pub_date) > 4 and pub_date like '%-%-%'; --set the rest to upload date, xml_documents update doi_registration doi set pub_date = to_char(date_created, 'YYYY') from xml_documents x, identifier id where length(pub_date) != 4 and doi.guid = id.guid and id.docid = x.docid and id.rev = x.rev; -- revisions update doi_registration doi set pub_date = to_char(date_created, 'YYYY') from xml_revisions x, identifier id where length(pub_date) != 4 and doi.guid = id.guid and id.docid = x.docid and id.rev = x.rev; update doi_registration set creator = replace(creator, E'\n', ' '); -- some entries got in with blank spaces update doi_registration set title = 'unknown' where length(trim(title)) = 0; update doi_registration set creator = 'unknown' where length(trim(creator)) = 0; -- BES has very long publisher fields, so use this default for them all update doi_registration set publisher = 'Baltimore Ecosystem Study LTER' where guid like '%knb-lter-bes%'; -- need URL identifiers for DataCite when they are not DOIs update doi_registration set obsoletedBy = 'https://cn.dataone.org/cn/v1/resolve/' || obsoletedBy where obsoletedBy not like 'doi%'; update doi_registration set obsoletes = 'https://cn.dataone.org/cn/v1/resolve/' || obsoletes where obsoletes not like 'doi%'; ALTER TABLE doi_registration ADD COLUMN obsoletedByIdType text, ADD COLUMN obsoletesIdType text, ADD COLUMN resourceMapLocationIdType text; update doi_registration set obsoletedByIdType = 'DOI' where obsoletedBy is not null and obsoletedBy like 'doi%'; update doi_registration set obsoletedByIdType = 'URL' where obsoletedBy is not null and obsoletedBy not like 'doi%'; update doi_registration set obsoletesIdType = 'DOI' where obsoletes is not null and obsoletes like 'doi%'; update doi_registration set obsoletesIdType = 'URL' where obsoletes is not null and obsoletes not like 'doi%'; update doi_registration set resourceMapLocationIdType = 'URL' where resourceMapLocation is not null; -- copy to the external file COPY (select ezid_account, guid as dc_identifier, url as datacite_url, title as dc_title, creator as dc_creator, publisher as dc_publisher, pub_date as datacite_publicationYear, split_part(resourceType, '/', 1) as datacite_resourceTypeGeneral, split_part(resourceType, '/', 2) as datacite_resourceType, objectFormat as datacite_format, obsoletedBy as datacite_relatedIdentifier_isPreviousVersionOf, obsoletedByIdType as datacite_relatedIdentifier_isPreviousVersionOfType, obsoletes as datacite_relatedIdentifier_isNewVersionOf, obsoletesIdType as datacite_relatedIdentifier_isNewVersionOfType, resourceMapLocation as datacite_relatedIdentifier_isPartOf, resourceMapLocationIdType as datacite_relatedIdentifier_isPartOfType, access as d1_read_access from doi_registration where access = 'public' order by dc_identifier) TO '/tmp/doi_registration.csv' WITH CSV HEADER; --164548 --drop table doi_registration; --drop table ecogrid_docids; --drop table ecoinfo_dn;