Skip to content
Snippets Groups Projects
Commit 6c4a7eb9 authored by Hugh Dickinson's avatar Hugh Dickinson
Browse files

Small bug fixes and added documentation.

parent 248545b2
No related branches found
No related tags found
No related merge requests found
# esap-userprofile-python-client # esap-userprofile-python-client
A Python client for the ESCAPE ESAP User Profile REST API. A Python client for the ESCAPE ESAP User Profile REST API.
The `shopping_client` module, which communicates with the ESCAPE ESAP User Profile REST API is very lightweight. Archive-specific functionality is delegated to "connector" modules like the `zooniverse` module.
### Example - Using the Shopping Client with the Zooniverse connector
```python
from shopping_client import shopping_client
from zooniverse import zooniverse
import getpass
# Prompt for Zooniverse account password
zooniverse_password = getpass.getpass("Enter Zooniverse password:")
# Instantiate Zooniverse connector
zc = zooniverse(username="hughdickinson", password=zooniverse_password)
# Instantiate ESAP User Profile shopping client, passing zooniverse connector
sc = shopping_client(host="https://sdc-dev.astron.nl:5555/", connectors=[zc])
# Retrieve basket (prompts to enter access token obtained from ESAP GUI)
res=sc.get_basket(convert_to_pandas=True)
# ... inspect available results ...
# Retrieve data from Zooniverse based on basket item
data = zc.retrieve(res["zooniverse"].loc[3],
generate=False,
wait=True,
convert_to_pandas=True)
# ... analyse data ...
```
...@@ -5,7 +5,7 @@ with open("README.md", "r") as fh: ...@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
setuptools.setup( setuptools.setup(
name="esap-userprofile-python-client", name="esap-userprofile-python-client",
version="0.0.1", version="0.0.2",
author="Hugh Dickinson", author="Hugh Dickinson",
author_email="hugh.dickinson@open.ac.uk", author_email="hugh.dickinson@open.ac.uk",
description="Python client for ESAP Data Discovery Shoipping Basket", description="Python client for ESAP Data Discovery Shoipping Basket",
......
...@@ -2,21 +2,65 @@ import requests ...@@ -2,21 +2,65 @@ import requests
import json import json
import urllib.parse import urllib.parse
import getpass import getpass
import pandas as pd
class shopping_client: class shopping_client:
endpoint = "esap-api/accounts/user-profiles/" endpoint = "esap-api/accounts/user-profiles/"
def __init__(self, username, host="http://localhost:5555/", connector=None): def __init__(
self.username = username self,
token: str = None,
host: str = "http://localhost:5555/",
connectors: list = [],
):
"""Constructor.
Parameters
----------
token : str
OAuth access token as a string.
host : str
Hostname of the EASP Gateway backend.
connectors : list
List of connector classes that can handle specific types of shopping
item.
"""
self.token = token
self.host = host self.host = host
self.connector = connector self.connectors = connectors
self.basket = None self.basket = None
self.token = None
def get_basket(self, convert_to_pandas=False, reload=False): def get_basket(
self, convert_to_pandas: bool = False, reload: bool = False
) -> Optional[list[dict]]:
"""Retrieve the shopping basket for a user.
Prompts for access token if one was not supplied to constructor.
Parameters
----------
convert_to_pandas : bool
If `True`, attempt to convert items from the basket to pandas Series
and concatenate items from the same archive into pandas DataFrames.
Note that items that cannot be converted by any of the connector
classes passed to the constructor will be ignored and lost. The
default `convert_to_pandas = False` returns all items as a `list`
of `dict`s
reload : bool
If `True` a fresh query is issued to the ESAP API to refresh the
basket contents.
Returns
-------
Union[list[dict], pd.DataFrame, None]
Description of returned object.
"""
if self.basket is None or reload: if self.basket is None or reload:
url = urllib.parse.urljoin(self.host, shopping_client.endpoint) url = urllib.parse.urljoin(self.host, shopping_client.endpoint)
response = requests.get(url, headers=self._request_header()) response = requests.get(url, headers=self._request_header())
...@@ -24,34 +68,39 @@ class shopping_client: ...@@ -24,34 +68,39 @@ class shopping_client:
self.basket = json.loads(response.content)["results"][0][ self.basket = json.loads(response.content)["results"][0][
"shopping_cart" "shopping_cart"
] ]
else:
return None
if convert_to_pandas: if convert_to_pandas:
return self._basket_to_pandas() return self._basket_to_pandas()
return self.basket return self.basket
else:
return None
def _request_header(self): def _request_header(self):
while self.token is None: while self.token is None:
self.get_token() self._get_token()
return dict(Accept="application/json", Authorization=f"Bearer {self.token}") return dict(Accept="application/json", Authorization=f"Bearer {self.token}")
def _basket_to_pandas(self): def _basket_to_pandas(self):
if self.connectors is not None: if len(self.connectors):
return { converted_basket = {
connector.name: pd.concat( connector.name: pd.concat(
[ [
connector._basket_item_to_pandas(item) connector.basket_item_to_pandas(item)
for item in self.basket for item in self.basket
if connector._validate_basket_item(item) if connector.validate_basket_item(item)
] ],
axis=1,
) )
for connector in self.connectors for connector in self.connectors
} }
return {
name: data.to_frame().T if data.ndim < 2 else data.T
for name, data in converted_basket.items()
}
warning( warning(
"No archive connectors specified - could not convert basket items to Pandas DataFrame" "No archive connectors specified - could not convert any basket items to Pandas DataFrame"
) )
return basket return self.basket
def _get_token(self): def _get_token(self):
self.token = getpass.getpass("Enter your ESAP autorization token:") self.token = getpass.getpass("Enter your ESAP access token:")
...@@ -3,6 +3,9 @@ import json ...@@ -3,6 +3,9 @@ import json
import io import io
import getpass import getpass
import pandas as pd import pandas as pd
from typing import Union, Optional
from panoptes_client import Panoptes, Project, Workflow from panoptes_client import Panoptes, Project, Workflow
from panoptes_client.panoptes import PanoptesAPIException from panoptes_client.panoptes import PanoptesAPIException
...@@ -16,7 +19,16 @@ class zooniverse: ...@@ -16,7 +19,16 @@ class zooniverse:
"classifications": dict(metadata=json.loads, annotations=json.loads), "classifications": dict(metadata=json.loads, annotations=json.loads),
} }
def __init__(self, username, password=None): def __init__(self, username: str, password: str = None):
"""Constructor.
Parameters
----------
username : str
Zooniverse (panoptes) account username.
password : str
Zooniverse (panoptes) account password.
"""
self.username = username self.username = username
self.password = password self.password = password
if self.password is None: if self.password is None:
...@@ -24,7 +36,7 @@ class zooniverse: ...@@ -24,7 +36,7 @@ class zooniverse:
self.panoptes = Panoptes.connect(username=self.username, password=self.password) self.panoptes = Panoptes.connect(username=self.username, password=self.password)
def is_available(self, item, verbose=False): def is_available(self, item: Union[dict, pd.Series], verbose: bool = False):
try: try:
description = self._get_entity(item).describe_export( description = self._get_entity(item).describe_export(
self._get_item_entry(item, "category") self._get_item_entry(item, "category")
...@@ -35,7 +47,35 @@ class zooniverse: ...@@ -35,7 +47,35 @@ class zooniverse:
except PanoptesAPIException as e: except PanoptesAPIException as e:
return False return False
def generate(self, item, wait=False, convert_to_pandas=True, **read_csv_args): def generate(
self,
item: Union[dict, pd.Series],
wait: bool = False,
convert_to_pandas: bool = True,
**read_csv_args
) -> Union[requests.Response, pd.DataFrame, None]:
"""Generate an export of data from the Zooniverse panoptes database
specified by an item from the shopping basket.
Parameters
----------
item : Union[dict, pd.Series]
A single item from a retrieved shopping basket - either a raw `dict`
or a converted `pd.Series`.
wait : bool
If `True` blocks until the requested item has been generated.
convert_to_pandas : bool
If `True` the retrieved, generated data are parsed into a pd.DataFrame.
**read_csv_args : type
Extra arguments passed to `pd.read_csv()` when parsing the retrieved
data.
Returns
-------
Union[requests.Response, pd.DataFrame, None]
Description of returned object.
"""
print("Generating requested export...") print("Generating requested export...")
if wait: if wait:
print("\t\tWaiting for generation to complete...") print("\t\tWaiting for generation to complete...")
...@@ -62,15 +102,49 @@ class zooniverse: ...@@ -62,15 +102,49 @@ class zooniverse:
return None return None
def retrieve( def retrieve(
self, item, generate=False, wait=False, convert_to_pandas=True, **read_csv_args self,
): item: Union[dict, pd.Series],
generate: bool = False,
wait: bool = False,
convert_to_pandas: bool = True,
**read_csv_args
) -> Union[requests.Response, pd.DataFrame, None]:
"""Retrieve data specified by an item from the shopping basket from the
Zooniverse panoptes database. Optionally (re)generate the requested
data.
Parameters
----------
item : Union[dict, pd.Series]
A single item from a retrieved shopping basket - either a raw `dict`
or a converted `pd.Series`.
generate : bool
If `True` generate the requested data item. If the item has already
been generated, it will be regenerated. If the item does not exist
and `generate` is `False` a warning is shown and `None` is returned.
wait : bool
If `generate` is `True`, setting `wait` to `True` blocks until the
requested item has been generated. If `generate` is `False`, `wait`
has no effect.
convert_to_pandas : bool
If `True` the retrieved data are parsed into a pd.DataFrame.
**read_csv_args : type
Extra arguments passed to `pd.read_csv()` when parsing the retrieved
data.
Returns
-------
type
Union[requests.Response, pd.DataFrame]
"""
if self.is_available(item) and not generate: if self.is_available(item) and not generate:
response = self._get_entity(item).get_export( response = self._get_entity(item).get_export(
self._get_item_entry(item, "category"), generate=False, wait=wait self._get_item_entry(item, "category"), generate=False, wait=wait
) )
else: else:
if not generate: if not generate:
print( warning(
"Requested resource is not available and you have specified generate==False" "Requested resource is not available and you have specified generate==False"
) )
return None return None
...@@ -107,22 +181,69 @@ class zooniverse: ...@@ -107,22 +181,69 @@ class zooniverse:
return entity return entity
def _get_item_entry(self, item, entry): def _get_item_entry(self, item, entry):
item_data = json.loads(item["item_data"].replace("'", '"')) if type(item) == dict:
return item_data.get(entry, None) print(item)
item = json.loads(item["item_data"].replace("'", '"'))
return item.get(entry, None)
def _catalogue_to_id_string(self, item): def _catalogue_to_id_string(self, item):
return self._get_item_entry(item, "catalog") + "_id" return self._get_item_entry(item, "catalog") + "_id"
def _basket_item_to_pandas(self, basket_item, validate=True): def basket_item_to_pandas(
self, basket_item: Union[dict, pd.Series], validate: bool = True
) -> Optional[pd.Series]:
"""Convert an item from the shopping basket into a `pd.Series` with
optional validation.
Parameters
----------
basket_item : Union[dict, pd.Series]
A single item from a retrieved shopping basket - either a raw `dict`
or a converted `pd.Series`.
validate : bool
If `True`, check that the data in the shopping item conforms with
the expected format before attempting the conversion.
Returns
-------
Optional[pd.Series]
`pd.Series` containing the data encoded in the shopping item or
`NoneType`.
"""
if validate: if validate:
item_data = self._validate_basket_item(basket_item, return_loaded=True) item_data = self.validate_basket_item(basket_item, return_loaded=True)
else: else:
item_data = json.loads(basket_item["item_data"]) item_data = json.loads(basket_item["item_data"])
if item_data: if item_data:
return pd.Series(item_data) return pd.Series(item_data)
return None return None
def _validate_basket_item(self, basket_item, return_loaded=False): def validate_basket_item(
self, basket_item: Union[dict, pd.Series], return_loaded: bool = False
) -> Union[dict, bool, None]:
"""Check that the data in the shopping item conforms with
the expected format
Parameters
----------
basket_item : Union[dict, pd.Series]
A single item from a retrieved shopping basket - either a raw `dict`
or a converted `pd.Series`.
return_loaded : bool
If `True`, and validation succeeds return the extracted shopping item
as `dict`, otherwise return `True` if validation succeeds and `None`
otherwise.
Returns
-------
Union[dict, bool, None]
If `return_loaded` is `True`, return a `dict` containing the data
encoded in the shopping item when validation succeeds.
Otherwise if `return_loaded` is `True` validation succeeds.
If validation fails return `None`.
"""
item_data = json.loads(basket_item["item_data"]) item_data = json.loads(basket_item["item_data"])
if "archive" in item_data and item_data["archive"] == "zooniverse": if "archive" in item_data and item_data["archive"] == "zooniverse":
if return_loaded: if return_loaded:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment