diff --git a/README.md b/README.md index 08009f91521276953fc2ea0e84607d69da657ddb..55760f053306c1f189ab25b4536a0dac08847784 100644 --- a/README.md +++ b/README.md @@ -125,3 +125,129 @@ These 2 are highlighted as an example of a primary and ancillary dataset that be ``` adex_data_scraper --argfile examples\vo\apertif_dr1_continuum_images_localhost.args adex_data_scraper --argfile examples\postgres\ancillary_apertif_inspectionplots_localhost.args +``` + +## How To add new VO collections to ADEX +This is an example of how to add the ALMA ivoa.obscore collecion to ADEX. + + + +Most of the work is in the `adex-data-scraper` (the current repo). +With one small configuration change in the `adex-backend-django` configuration files for `adex-labs` and `adex-next` to enable the new collection. + +* use a VO application like Topcat to find or query datasets. This example uses Topcat: + * look for the Table Access Protocol (TAP service). In Topcat: VO => TAP query => keyword 'ALMA' + * select a table: alma.ivoa.obscore + * SDQL Query: SELECT TOP 1000 * FROM ivoa.obscore + * double click 'Table List' + +This shows the fields that you need to translate with a 'connector' to ADEX. + + + +#### argument file +Create your argument file (also see 'examples' chapter) + +``` +--datasource=vo +--connector=ALMA.Obscore +--data_host=http://jvo.nao.ac.jp/skynode/do/tap/alma/ivoa.obscore +--batch_size=1000 +--adex_backend_host=https://sdc.astron.nl/adex_backend/ +--adex_resource=primary_dp/create +--adex_token=6b85509349313c7bdb16bd706d43ee5eb1cfb5da +--clear_collection +--collection=alma_obscore +``` + +Most arguments are default or obvious, but some need a bit more explanation. + +`--connector=ALMA.Obscore` + +This refers to a file `ALMA.py` and classname `Obscore` in that file, in the `vo.connectors` directory. +You will need to create that file, this is the 'connector'. (see next chapter) + +`--data_host=http://jvo.nao.ac.jp/skynode/do/tap/alma/ivoa.obscore` + +This is a combination of the 'service URL' and the table name (indicated in red in the topcat screenshot) + +`--collection=alma_obscore` + +You can freely choose this name. This is the name that appears in the Collection dropdown menu. +You need to use the same name in the adex backend configuration (see 'adex-backend-django configuration') + + +#### write the connector (ALMA.Obscore) +This translates the results from the ADQL query to ADEX format. +Not every VO services uses the same field names for similar information (like ra,dec), +so you need to look at the VO table result or Schema in TOPCAT which names this service uses. +Also, not all the ADEX fields will be available in every service, and they an be left out. + +The 'translate()' function is an overridden function, which means that its name, arguments and +returned results are given and should not be changed. +Don't change the keys in the dict, only the identifiers in the 'row' + +For example, ADEX expects the Right Ascension named as '**ra**', +which should be in decimal degrees as returned as a float in the payload json. + +The ALMA service returns Right Ascension named as 's_ra' and returns it as a string. +So you need to convert it so that it fits ADEX: float(row['s_ra']) + +```python +class Obscore(): + + def translate(self, row, args): + """ + parse the specific row that comes from the VO adql query, + and translate it into the standard json payload for posting to the ADEX backend REST API + + :param row: the results from the ADQL query to a VO service + :param args: the commandline arguments, but only args.collection is currently used + :return: ADEX record as json structure + """ + payload = dict( + pid=row['data_id'], + name=row['target_name'], + dp_type=row['dataproduct_type'], + format="fits", + locality="online", + access_url=row['access_url'], + ra=float(row['s_ra']), + dec=float(row['s_dec']), + equinox="2000.0", + + release_date=row['obs_release_date'], + data_provider="ALMA", + + sky_footprint=row['s_region'], + + dataset_id=str(row['data_id']), + activity=None, + collection = args.collection, + ) + + return payload +``` + +#### adex-backend-django configuration +The frontend applications (adex-labs and adex-gui) get their configuration from adex-backend. +This is the directory where the frontend configuration files are kept. +https://git.astron.nl/astron-sdc/adex-backend-django/-/tree/main/adex_backend/adex_backend/configuration?ref_type=heads + +Look for the "collections" tag in the configuration, and add new alma_obscore collection to it + + "collections": [ + { "name" : "linc_skymap", "dp_types": ['qa-skymap']}, + { "name" : "linc_visibilities", "dp_types": ['die-calibrated-visibilities'], "distinct_field" : "dataset_id"}, + { "name" : "apertif-dr1", "dp_types": ['science-skymap']}, + { "name" : "lotts-dr2", "dp_types": ['skymap']}, + { "name" : "lofar-skyimage", "dp_types": ['skyimage']}, + { "name" : "alma_obscore", "dp_types": ['IMAGE','CUBE']} + ], + +The `alma_obscore` is the name you chose for your collection, as defined in the argument file. + +`IMAGE`and `CUBE` are values of the `dataproduct_type` field in the ALMA ivoa.obscore collection. +These can be mapped onto the ADEX dp_type field (see previous chapter). +By adding them to the configuration, the frontend(s) knows to give these values as filter options once the `alma_obscore` collection is selected. + diff --git a/docs/adex-labs_alma.jpg b/docs/adex-labs_alma.jpg new file mode 100644 index 0000000000000000000000000000000000000000..98bcf06e9cbe119d4a81db3b83ea4eb3566374a3 Binary files /dev/null and b/docs/adex-labs_alma.jpg differ diff --git a/docs/topcat_alma.jpg b/docs/topcat_alma.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9840a1a58fd70e6b0b1d4442c1cefed21112f021 Binary files /dev/null and b/docs/topcat_alma.jpg differ diff --git a/examples/vo/alma/alma_obscore_localhost.args b/examples/vo/alma/alma_obscore_localhost.args new file mode 100644 index 0000000000000000000000000000000000000000..dcca5f16704de9e3245cdde5258856411b19c950 --- /dev/null +++ b/examples/vo/alma/alma_obscore_localhost.args @@ -0,0 +1,9 @@ +--datasource=vo +--connector=ALMA.Obscore +--data_host=http://jvo.nao.ac.jp/skynode/do/tap/alma/ivoa.obscore +--batch_size=1000 +--adex_backend_host=http://localhost:8000/adex_backend/ +--adex_resource=primary_dp/create +--adex_token=9519e433ba37487f1a18121dfb1957d992fbb790 +--clear_collection +--collection=alma_obscore diff --git a/examples/vo/alma/alma_obscore_sdc.args b/examples/vo/alma/alma_obscore_sdc.args new file mode 100644 index 0000000000000000000000000000000000000000..7e6b764bfb8642dda3aed0631b2f9cf86cb97cad --- /dev/null +++ b/examples/vo/alma/alma_obscore_sdc.args @@ -0,0 +1,9 @@ +--datasource=vo +--connector=ALMA.Obscore +--data_host=http://jvo.nao.ac.jp/skynode/do/tap/alma/ivoa.obscore +--batch_size=1000 +--adex_backend_host=https://sdc.astron.nl/adex_backend/ +--adex_resource=primary_dp/create +--adex_token=6b85509349313c7bdb16bd706d43ee5eb1cfb5da +--clear_collection +--collection=alma_obscore diff --git a/scraper/vo/connectors/ALMA.py b/scraper/vo/connectors/ALMA.py new file mode 100644 index 0000000000000000000000000000000000000000..3fda8aa3d868e788c2294a43149c35225f65eb0f --- /dev/null +++ b/scraper/vo/connectors/ALMA.py @@ -0,0 +1,35 @@ + +class Obscore(): + + def translate(self, row, args): + """ + parse the specific row that comes from the VO adql query, + and translate it into the standard json payload for posting to the ADEX backend REST API + + :param row: the results from the ADQL query to a VO service + :param args: the commandline arguments, but only args.collection is currently used + :return: ADEX record as json structure + """ + payload = dict( + pid=row['data_id'], + name=row['target_name'], + dp_type=row['dataproduct_type'], + format="fits", + locality="online", + access_url=row['access_url'], + ra=float(row['s_ra']), + dec=float(row['s_dec']), + equinox="2000.0", + + release_date=row['obs_release_date'], + data_provider="ALMA", + + sky_footprint=row['s_region'], + + dataset_id=str(row['data_id']), + activity=None, + collection = args.collection, + ) + + return payload + diff --git a/scraper/vo/connectors/Apertif_DR1.py b/scraper/vo/connectors/Apertif_DR1.py index 6687c538af2f229ffe22a152599321181d980e6d..d363ed09848de2bafcfaacdfb95bbbfd58f57a76 100644 --- a/scraper/vo/connectors/Apertif_DR1.py +++ b/scraper/vo/connectors/Apertif_DR1.py @@ -9,7 +9,7 @@ class ContinuumImagesConnector(): :param row: the results from the ADQL query to a VO service :param args: the commandline arguments, but only args.collection is currently used - :return: user, password, host, database, db_port + :return: ADEX record as json structure """ freq_min = float(row['freqmin']) freq_max = float(row['freqmax']) diff --git a/scraper/vo/connectors/LotssDR2.py b/scraper/vo/connectors/LotssDR2.py index cffb90eb3ea4ab0159f083bb9422a1557edc93e2..16fbb6f5bd224fde6a906f0789cff19924b4a666 100644 --- a/scraper/vo/connectors/LotssDR2.py +++ b/scraper/vo/connectors/LotssDR2.py @@ -8,7 +8,7 @@ class LotssDR2Mosaics(): :param row: the results from the ADQL query to a VO service :param args: the commandline arguments, but only args.collection is currently used - :return: user, password, host, database, db_port + :return: ADEX record as json structure """ payload = dict( pid=row['data_pid'], diff --git a/scraper/vo/vo_scraper.py b/scraper/vo/vo_scraper.py index cefa2ef7260f5a348129eb955b5d7a030f14ef2a..e311bc4f4d413c3f10479034185d45962edf0a16 100644 --- a/scraper/vo/vo_scraper.py +++ b/scraper/vo/vo_scraper.py @@ -6,7 +6,7 @@ import logging logger = logging.getLogger('vo_scraper') from scraper.adex_io import ADEX -from scraper.vo.connectors import Apertif_DR1, LotssDR2, Linc +from scraper.vo.connectors import Apertif_DR1, LotssDR2, Linc, ALMA def run(args): """ @@ -40,6 +40,10 @@ def run(args): connector_class = getattr(Linc, connector_name) connector = connector_class() + if connector_module.upper() == 'ALMA': + connector_class = getattr(ALMA, connector_name) + connector = connector_class() + # construct the ADQL query #select_fields = "centeralpha,centerdelta,imagetitle,accref" select_fields = "*" @@ -93,7 +97,10 @@ def run(args): t3 = datetime.datetime.now() print(f"{total_count} records in total in {t3 - t0}") + except UnboundLocalError as error: + print(f"ERROR: {error}. Did you import and hook up the {connector_module} connector in vo_scraper.py?") + return except Exception as error: - print(f"ERROR: {error}") + print(f"ERROR: {error}.") return