README.md

# only once
> cd ~
> mkdir adex-data-scraper
> cd adex-data-scraper
> virtualenv env3.9 -p python3.9 (on ubuntu)
> python3 -m venv env3.9 (on centos)

# for every deployment
> cd ~/adex-data-scraper
> source env3.9/bin/activate
> pip install --upgrade pip
> pip install adex-data-scraper --extra-index-url https://git@git.astron.nl/api/v4/projects/349/packages/pypi/simple --upgrade

> adex_data_scraper -h
--- adex-data-scraper (version 14 feb 2023) ---
  > adex_data_scraper -h
usage: main.py [-h] [--datasource DATASOURCE] [--data_host DATA_HOST]
               [--connector CONNECTOR] [--limit LIMIT]
               [--batch_size BATCH_SIZE]
               [--adex_backend_host [ADEX_BACKEND_HOST]]
               [--adex_resource ADEX_RESOURCE] [--collection COLLECTION]
               [--clear_resource] [--clear_collection] [--simulate_post]
               [--adex_token ADEX_TOKEN] [-v] [--argfile [ARGFILE]]

options:
  -h, --help            show this help message and exit
  --datasource DATASOURCE
                        where should the data be imported from? Options: vo,
                        postgres
  --data_host DATA_HOST
                        service/table, either VO or postgres. Examples:
                        https://vo.astron.nl/tap/apertif_dr1.continuum_images,
                        postgres:postgres@localhost:5432/alta
  --connector CONNECTOR
                        Connector class containing the translation scheme from
                        vo to adex
  --limit LIMIT         max records to fetch from VO, for dev/test purposes
  --batch_size BATCH_SIZE
                        number of records to post as a batch to ADEX
  --adex_backend_host [ADEX_BACKEND_HOST]
                        location of the adex-backend-django application
  --adex_resource ADEX_RESOURCE
                        resource/table to update, options are:
                        primary_dp/create, ancillary_dp/create,
                        activity/create
  --collection COLLECTION
                        can be used as filter in ADEX backend and ADEX
                        frontend
  --clear_resource      Delete all the data from the adex_resource
  --clear_collection    Delete all the data for this collection from the
                        adex_resource, works in concert with the '--
                        collection' parameter.
  --simulate_post       If true, then no data is posted to ADEX
  --adex_token          ADEX_TOKEN
                        Token to login
  -v, --verbose         More information about atdb_services at run time.
  --argfile [ARGFILE]   Ascii file with arguments (overrides all other
                        arguments

Process finished with exit code 0

adex_data_scraper --argfile examples\vo\apertif_dr1_continuum_images_localhost.args
adex_data_scraper --argfile examples\postgres\ancillary_apertif_inspectionplots_localhost.args
--datasource=vo
--connector=ALMA.Obscore
--data_host=http://jvo.nao.ac.jp/skynode/do/tap/alma/ivoa.obscore
--batch_size=1000
--adex_backend_host=https://sdc.astron.nl/adex_backend/
--adex_resource=primary_dp/create
--adex_token=6b85509349313c7bdb16bd706d43ee5eb1cfb5da
--clear_collection
--collection=alma_obscore
class Obscore():

    def translate(self, row, args):
        """
        parse the specific row that comes from the VO adql query,
        and translate it into the standard json payload for posting to the ADEX backend REST API

        :param row: the results from the ADQL query to a VO service
        :param args: the commandline arguments, but only args.collection is currently used
        :return: ADEX record as json structure
        """
        payload = dict(
            pid=row['data_id'],
            name=row['target_name'],
            dp_type=row['dataproduct_type'],
            format="fits",
            locality="online",
            access_url=row['access_url'],
            ra=float(row['s_ra']),
            dec=float(row['s_dec']),
            equinox="2000.0",

            release_date=row['obs_release_date'],
            data_provider="ALMA",

            sky_footprint=row['s_region'],

            dataset_id=str(row['data_id']),
            activity=None,
            collection = args.collection,
        )

        return payload
    "collections": [
        { "name" : "linc_skymap", "dp_types": ['qa-skymap']},
        { "name" : "linc_visibilities", "dp_types": ['die-calibrated-visibilities'], "distinct_field" : "dataset_id"},
        { "name" : "apertif-dr1", "dp_types": ['science-skymap']},
        { "name" : "lotts-dr2", "dp_types": ['skymap']},
        { "name" : "lofar-skyimage", "dp_types": ['skyimage']},
        { "name" : "alma_obscore", "dp_types": ['IMAGE','CUBE']}
    ],
> adex_data_scraper --argfile ./alma_obscore_sdc.args
1000 records fetched from http://jvo.nao.ac.jp/skynode/do/tap/alma
1000 posted to https://sdc.astron.nl/adex_backend/ in 0:00:17.483034
1000 records fetched from http://jvo.nao.ac.jp/skynode/do/tap/alma
1000 posted to https://sdc.astron.nl/adex_backend/ in 0:00:15.901474
1000 records fetched from http://jvo.nao.ac.jp/skynode/do/tap/alma
1000 posted to https://sdc.astron.nl/adex_backend/ in 0:00:17.173940
1000 records fetched from http://jvo.nao.ac.jp/skynode/do/tap/alma
1000 posted to https://sdc.astron.nl/adex_backend/ in 0:00:17.729760
1000 records fetched from http://jvo.nao.ac.jp/skynode/do/tap/alma