Skip to content
Snippets Groups Projects
Commit 7fa823c6 authored by Hugh Dickinson's avatar Hugh Dickinson
Browse files

Added features to select a subset of rows and specify custom column headings.

parent cb66cf08
No related branches found
No related tags found
No related merge requests found
......@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
setuptools.setup(
name="esap-userprofile-python-client",
version="0.0.3",
version="0.0.4",
author="Hugh Dickinson",
author_email="hugh.dickinson@open.ac.uk",
description="Python client for ESAP Data Discovery Shoipping Basket",
......
......@@ -147,7 +147,9 @@ class zooniverse:
if response.ok:
if convert_to_pandas:
return (
self._chunked_content(item, response, chunk_size=chunk_size)
self._chunked_content(
item, response, chunk_size=chunk_size, **read_csv_args
)
if chunked_retrieve
else pd.read_csv(
io.BytesIO(response.content),
......@@ -166,18 +168,22 @@ class zooniverse:
item: Union[dict, pd.Series],
response: requests.Response,
chunk_size: int = int(1e5),
**read_csv_args,
):
response_iterator = response.iter_lines(1)
chunk_frames = []
nrows = read_csv_args.pop("nrows", None)
skiprows = read_csv_args.pop("skiprows", 0)
_ = read_csv_args.pop("header", None)
names = read_csv_args.pop("names", None)
while True:
chunk = b"\n".join(
[
line
for _, line in zip(range(chunk_size), response_iterator)
if line
]
[line for _, line in zip(range(chunk_size), response_iterator) if line]
)
if len(chunk) == 0:
if len(chunk) == 0 or (
nrows is not None
and len(chunk_frames) * chunk_size - 1 > nrows + skiprows
):
# response_iterator exhausted
print("All data received.")
break
......@@ -188,10 +194,20 @@ class zooniverse:
self._get_item_entry(item, "category")
],
header=None if len(chunk_frames) else 0,
names=chunk_frames[0].columns if len(chunk_frames) else None,
names=chunk_frames[0].columns
if len(chunk_frames)
else names
if names is not None
else None,
**read_csv_args,
)
)
end = (skiprows + nrows) if nrows is not None else None
return (
pd.concat(chunk_frames, axis=0, ignore_index=True)
.iloc[slice(skiprows, end)]
.reset_index(drop=True)
)
return pd.concat(chunk_frames, axis=0, ignore_index=True)
def _get_entity(self, item):
entity = zooniverse.entity_types[self._get_item_entry(item, "catalog")].find(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment