diff --git a/README.md b/README.md index ceed0c973f6fe22f1a313d53a4d8f090be70722a..8e501f4003c8842205c665f493a6bc88f45a7e51 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,36 @@ # esap-userprofile-python-client A Python client for the ESCAPE ESAP User Profile REST API. + +The `shopping_client` module, which communicates with the ESCAPE ESAP User Profile REST API is very lightweight. Archive-specific functionality is delegated to "connector" modules like the `zooniverse` module. + +### Example - Using the Shopping Client with the Zooniverse connector + +```python +from shopping_client import shopping_client +from zooniverse import zooniverse +import getpass + +# Prompt for Zooniverse account password +zooniverse_password = getpass.getpass("Enter Zooniverse password:") + +# Instantiate Zooniverse connector +zc = zooniverse(username="hughdickinson", password=zooniverse_password) + +# Instantiate ESAP User Profile shopping client, passing zooniverse connector +sc = shopping_client(host="https://sdc-dev.astron.nl:5555/", connectors=[zc]) + +# Retrieve basket (prompts to enter access token obtained from ESAP GUI) +res=sc.get_basket(convert_to_pandas=True) + +# ... inspect available results ... + +# Retrieve data from Zooniverse based on basket item +data = zc.retrieve(res["zooniverse"].loc[3], + generate=False, + wait=True, + convert_to_pandas=True) + +# ... analyse data ... + +``` diff --git a/setup.py b/setup.py index 422e19d2936d0a79c5d1d203a84707c1e2992e00..07135eb3ef5bf460a971e2f8966321f1630fdbe8 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ with open("README.md", "r") as fh: setuptools.setup( name="esap-userprofile-python-client", - version="0.0.1", + version="0.0.2", author="Hugh Dickinson", author_email="hugh.dickinson@open.ac.uk", description="Python client for ESAP Data Discovery Shoipping Basket", diff --git a/shopping_client/shopping_client.py b/shopping_client/shopping_client.py index cb6e2eb1b9ffafc2b30c4984790cc38cee5f2dd9..5eb5cba65f0156c42f7e9846190b3d458a33b7cb 100644 --- a/shopping_client/shopping_client.py +++ b/shopping_client/shopping_client.py @@ -2,21 +2,65 @@ import requests import json import urllib.parse import getpass +import pandas as pd class shopping_client: endpoint = "esap-api/accounts/user-profiles/" - def __init__(self, username, host="http://localhost:5555/", connector=None): - self.username = username + def __init__( + self, + token: str = None, + host: str = "http://localhost:5555/", + connectors: list = [], + ): + """Constructor. + + Parameters + ---------- + token : str + OAuth access token as a string. + host : str + Hostname of the EASP Gateway backend. + connectors : list + List of connector classes that can handle specific types of shopping + item. + + """ + self.token = token self.host = host - self.connector = connector + self.connectors = connectors self.basket = None - self.token = None - def get_basket(self, convert_to_pandas=False, reload=False): + def get_basket( + self, convert_to_pandas: bool = False, reload: bool = False + ) -> Optional[list[dict]]: + """Retrieve the shopping basket for a user. + Prompts for access token if one was not supplied to constructor. + + Parameters + ---------- + convert_to_pandas : bool + If `True`, attempt to convert items from the basket to pandas Series + and concatenate items from the same archive into pandas DataFrames. + + Note that items that cannot be converted by any of the connector + classes passed to the constructor will be ignored and lost. The + default `convert_to_pandas = False` returns all items as a `list` + of `dict`s + + reload : bool + If `True` a fresh query is issued to the ESAP API to refresh the + basket contents. + + Returns + ------- + Union[list[dict], pd.DataFrame, None] + Description of returned object. + + """ if self.basket is None or reload: url = urllib.parse.urljoin(self.host, shopping_client.endpoint) response = requests.get(url, headers=self._request_header()) @@ -24,34 +68,39 @@ class shopping_client: self.basket = json.loads(response.content)["results"][0][ "shopping_cart" ] - if convert_to_pandas: - return self._basket_to_pandas() - return self.basket else: return None + if convert_to_pandas: + return self._basket_to_pandas() + return self.basket def _request_header(self): while self.token is None: - self.get_token() + self._get_token() return dict(Accept="application/json", Authorization=f"Bearer {self.token}") def _basket_to_pandas(self): - if self.connectors is not None: - return { + if len(self.connectors): + converted_basket = { connector.name: pd.concat( [ - connector._basket_item_to_pandas(item) + connector.basket_item_to_pandas(item) for item in self.basket - if connector._validate_basket_item(item) - ] + if connector.validate_basket_item(item) + ], + axis=1, ) for connector in self.connectors } + return { + name: data.to_frame().T if data.ndim < 2 else data.T + for name, data in converted_basket.items() + } warning( - "No archive connectors specified - could not convert basket items to Pandas DataFrame" + "No archive connectors specified - could not convert any basket items to Pandas DataFrame" ) - return basket + return self.basket def _get_token(self): - self.token = getpass.getpass("Enter your ESAP autorization token:") + self.token = getpass.getpass("Enter your ESAP access token:") diff --git a/zooniverse/zooniverse.py b/zooniverse/zooniverse.py index c13f82acca84ac43038660b619aeae471c8576fe..b3fe0538b28b5e3f1b1a3afeab834bfd64e9e05a 100644 --- a/zooniverse/zooniverse.py +++ b/zooniverse/zooniverse.py @@ -3,6 +3,9 @@ import json import io import getpass import pandas as pd + +from typing import Union, Optional + from panoptes_client import Panoptes, Project, Workflow from panoptes_client.panoptes import PanoptesAPIException @@ -16,7 +19,16 @@ class zooniverse: "classifications": dict(metadata=json.loads, annotations=json.loads), } - def __init__(self, username, password=None): + def __init__(self, username: str, password: str = None): + """Constructor. + + Parameters + ---------- + username : str + Zooniverse (panoptes) account username. + password : str + Zooniverse (panoptes) account password. + """ self.username = username self.password = password if self.password is None: @@ -24,7 +36,7 @@ class zooniverse: self.panoptes = Panoptes.connect(username=self.username, password=self.password) - def is_available(self, item, verbose=False): + def is_available(self, item: Union[dict, pd.Series], verbose: bool = False): try: description = self._get_entity(item).describe_export( self._get_item_entry(item, "category") @@ -35,7 +47,35 @@ class zooniverse: except PanoptesAPIException as e: return False - def generate(self, item, wait=False, convert_to_pandas=True, **read_csv_args): + def generate( + self, + item: Union[dict, pd.Series], + wait: bool = False, + convert_to_pandas: bool = True, + **read_csv_args + ) -> Union[requests.Response, pd.DataFrame, None]: + """Generate an export of data from the Zooniverse panoptes database + specified by an item from the shopping basket. + + Parameters + ---------- + item : Union[dict, pd.Series] + A single item from a retrieved shopping basket - either a raw `dict` + or a converted `pd.Series`. + wait : bool + If `True` blocks until the requested item has been generated. + convert_to_pandas : bool + If `True` the retrieved, generated data are parsed into a pd.DataFrame. + **read_csv_args : type + Extra arguments passed to `pd.read_csv()` when parsing the retrieved + data. + + Returns + ------- + Union[requests.Response, pd.DataFrame, None] + Description of returned object. + + """ print("Generating requested export...") if wait: print("\t\tWaiting for generation to complete...") @@ -62,15 +102,49 @@ class zooniverse: return None def retrieve( - self, item, generate=False, wait=False, convert_to_pandas=True, **read_csv_args - ): + self, + item: Union[dict, pd.Series], + generate: bool = False, + wait: bool = False, + convert_to_pandas: bool = True, + **read_csv_args + ) -> Union[requests.Response, pd.DataFrame, None]: + """Retrieve data specified by an item from the shopping basket from the + Zooniverse panoptes database. Optionally (re)generate the requested + data. + + Parameters + ---------- + item : Union[dict, pd.Series] + A single item from a retrieved shopping basket - either a raw `dict` + or a converted `pd.Series`. + generate : bool + If `True` generate the requested data item. If the item has already + been generated, it will be regenerated. If the item does not exist + and `generate` is `False` a warning is shown and `None` is returned. + wait : bool + If `generate` is `True`, setting `wait` to `True` blocks until the + requested item has been generated. If `generate` is `False`, `wait` + has no effect. + convert_to_pandas : bool + If `True` the retrieved data are parsed into a pd.DataFrame. + **read_csv_args : type + Extra arguments passed to `pd.read_csv()` when parsing the retrieved + data. + + Returns + ------- + type + Union[requests.Response, pd.DataFrame] + + """ if self.is_available(item) and not generate: response = self._get_entity(item).get_export( self._get_item_entry(item, "category"), generate=False, wait=wait ) else: if not generate: - print( + warning( "Requested resource is not available and you have specified generate==False" ) return None @@ -107,22 +181,69 @@ class zooniverse: return entity def _get_item_entry(self, item, entry): - item_data = json.loads(item["item_data"].replace("'", '"')) - return item_data.get(entry, None) + if type(item) == dict: + print(item) + item = json.loads(item["item_data"].replace("'", '"')) + return item.get(entry, None) def _catalogue_to_id_string(self, item): return self._get_item_entry(item, "catalog") + "_id" - def _basket_item_to_pandas(self, basket_item, validate=True): + def basket_item_to_pandas( + self, basket_item: Union[dict, pd.Series], validate: bool = True + ) -> Optional[pd.Series]: + """Convert an item from the shopping basket into a `pd.Series` with + optional validation. + + Parameters + ---------- + basket_item : Union[dict, pd.Series] + A single item from a retrieved shopping basket - either a raw `dict` + or a converted `pd.Series`. + validate : bool + If `True`, check that the data in the shopping item conforms with + the expected format before attempting the conversion. + + Returns + ------- + Optional[pd.Series] + `pd.Series` containing the data encoded in the shopping item or + `NoneType`. + + """ if validate: - item_data = self._validate_basket_item(basket_item, return_loaded=True) + item_data = self.validate_basket_item(basket_item, return_loaded=True) else: item_data = json.loads(basket_item["item_data"]) if item_data: return pd.Series(item_data) return None - def _validate_basket_item(self, basket_item, return_loaded=False): + def validate_basket_item( + self, basket_item: Union[dict, pd.Series], return_loaded: bool = False + ) -> Union[dict, bool, None]: + """Check that the data in the shopping item conforms with + the expected format + + Parameters + ---------- + basket_item : Union[dict, pd.Series] + A single item from a retrieved shopping basket - either a raw `dict` + or a converted `pd.Series`. + return_loaded : bool + If `True`, and validation succeeds return the extracted shopping item + as `dict`, otherwise return `True` if validation succeeds and `None` + otherwise. + + Returns + ------- + Union[dict, bool, None] + If `return_loaded` is `True`, return a `dict` containing the data + encoded in the shopping item when validation succeeds. + Otherwise if `return_loaded` is `True` validation succeeds. + If validation fails return `None`. + + """ item_data = json.loads(basket_item["item_data"]) if "archive" in item_data and item_data["archive"] == "zooniverse": if return_loaded: