Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PRO-937: Add create and insert table endpoints to sdk #114

Merged
merged 16 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions dune_client/api/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import logging.config
import os
from json import JSONDecodeError
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, Union, IO

from requests import Response, Session
from requests.adapters import HTTPAdapter, Retry
Expand Down Expand Up @@ -179,15 +179,22 @@ def _get(
return response
return self._handle_response(response)

def _post(self, route: str, params: Optional[Any] = None) -> Any:
def _post(
self,
route: str,
params: Optional[Any] = None,
data: Optional[IO[bytes]] = None,
headers: Optional[Dict[str, str]] = None,
) -> Any:
"""Generic interface for the POST method of a Dune API request"""
url = self._route_url(route)
self.logger.debug(f"POST received input url={url}, params={params}")
response = self.http.post(
url=url,
json=params,
headers=self.default_headers(),
headers=dict(self.default_headers(), **headers if headers else {}),
timeout=self.request_timeout,
data=data,
waddahAldrobi marked this conversation as resolved.
Show resolved Hide resolved
waddahAldrobi marked this conversation as resolved.
Show resolved Hide resolved
)
return self._handle_response(response)

Expand Down
37 changes: 2 additions & 35 deletions dune_client/api/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
)
from dune_client.api.execution import ExecutionAPI
from dune_client.api.query import QueryAPI
from dune_client.api.table import TableAPI
from dune_client.models import (
ResultsResponse,
DuneError,
Expand All @@ -36,7 +37,7 @@
POLL_FREQUENCY_SECONDS = 1


class ExtendedAPI(ExecutionAPI, QueryAPI):
bh2smith marked this conversation as resolved.
Show resolved Hide resolved
class ExtendedAPI(ExecutionAPI, QueryAPI, TableAPI):
"""
Provides higher level helper methods for faster
and easier development on top of the base ExecutionAPI.
Expand Down Expand Up @@ -316,40 +317,6 @@ def download_csv(
),
)

############################
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think some people might still be depending on this. We might need to keep it for backwards compatibility

Copy link
Contributor Author

@waddahAldrobi waddahAldrobi Mar 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

# Plus Subscription Features
############################
def upload_csv(
self,
table_name: str,
data: str,
description: str = "",
is_private: bool = False,
) -> bool:
"""
https://docs.dune.com/api-reference/tables/endpoint/upload
The write API allows you to upload any .csv file into Dune. The only limitations are:

- File has to be < 200 MB
- Column names in the table can't start with a special character or digits.
- Private uploads require a Plus subscription.

Below are the specifics of how to work with the API.
"""
response_json = self._post(
route="/table/upload/csv",
params={
"table_name": table_name,
"description": description,
"data": data,
"is_private": is_private,
},
)
try:
return bool(response_json["success"])
except KeyError as err:
raise DuneError(response_json, "UploadCsvResponse", err) from err

##############################################################################################
# Plus Features: these features use APIs that are only available on paid subscription plans
##############################################################################################
Expand Down
99 changes: 99 additions & 0 deletions dune_client/api/table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""
Table API endpoints enables users to
create and insert data into Dune.
"""

from __future__ import annotations
from typing import List, Dict, Any, IO

from dune_client.api.base import BaseRouter
from dune_client.models import DuneError


class TableAPI(BaseRouter):
"""
Implementation of Table endpoints - Plus subscription only
https://docs.dune.com/api-reference/tables/
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: There is no landing page for the Tables API. This url redirects to https://docs.dune.com/api-reference/tables/endpoint/create and there only appears to be one direct link in the docs page (inside a comment on uploadCSV)

image

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, you are correct.
Opening a ticket internally

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great callout! It should probably be an "Overview" or similar like with the other endpoint groups.

"""

def upload_csv(
self,
table_name: str,
data: str,
description: str = "",
is_private: bool = False,
) -> bool:
"""
https://docs.dune.com/api-reference/tables/endpoint/upload
This endpoint allows you to upload any .csv file into Dune. The only limitations are:

- File has to be < 200 MB
- Column names in the table can't start with a special character or digits.
- Private uploads require a Plus subscription.

Below are the specifics of how to work with the API.
"""
response_json = self._post(
route="/table/upload/csv",
params={
"table_name": table_name,
"description": description,
"data": data,
"is_private": is_private,
},
)
try:
return bool(response_json["success"])
except KeyError as err:
raise DuneError(response_json, "UploadCsvResponse", err) from err

def create_table(
self,
namespace: str,
table_name: str,
schema: List[Dict[str, str]],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At some point we probably want to create an enum of the possible types, but not yet!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you happen to have a list of all the possible types?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup they're defined here
https://dune-tables-docs.mintlify.app/api-reference/tables/endpoint/create

(Link is in docstring)
image

description: str = "",
is_private: bool = False,
) -> Any:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why Any ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is what self._post returns.
This was already implemented, but we can change that once we have something returned

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If so, should this change now? 😄

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am trying to say that Any is not a useful return type. What should users expect back when calling the create_table or insert_table functions ?

but we can change that once we have something returned

But we should already have a response returned from the API, right ?

That is what self._post returns.

The fact that self._post returns Any, might have been a bad design / tech debt. If this is part of the public API, which we cannot change, maybe we can consider returning a more useful type here.

If not an easy change, let's keep it like that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that it was probably not the best decision.
This function is kind of unnecessary, which is used to handle _post, _get, and _patch responses

def _handle_response(self, response: Response) -> Any:
"""Generic response handler utilized by all Dune API routes"""
try:
# Some responses can be decoded and converted to DuneErrors
response_json = response.json()
self.logger.debug(f"received response {response_json}")
return response_json
except JSONDecodeError as err:
# Others can't. Only raise HTTP error for not decodable errors
response.raise_for_status()
raise ValueError("Unreachable since previous line raises") from err

Should have just returned the Response object.
response.json() returns Any, so this will take a bit of work.

And as such, I don't think is worth it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was already implemented, but we can change that once we have something returned

I take this back, this was not as simple as I thought it would be.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks 🖖

Copy link
Collaborator

@bh2smith bh2smith Mar 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry folks, this was my fault ☹️ . But these internal private methods return Any and the idea is to convert them to the expected type inside these pubic facing interfaces. So, for example... we have these Object.from_dict() methods that convert Any responses to the expected types:

def get_execution_status(self, job_id: str) -> ExecutionStatusResponse:
"""GET status from Dune API for `job_id` (aka `execution_id`)"""
response_json = self._get(route=f"/execution/{job_id}/status")
try:
return ExecutionStatusResponse.from_dict(response_json)
except KeyError as err:
raise DuneError(response_json, "ExecutionStatusResponse", err) from err

Here would have been the same thing. Since I am currently implementing these API routes in the Node project, I will submit a corresponding PR for this one.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is the related PR: #117

"""
https://docs.dune.com/api-reference/tables/endpoint/create
The create table endpoint allows you to create an empty table
with a specific schema in Dune.

The only limitations are:
- If a table already exists with the same name, the request will fail.
- Column names in the table can’t start with a special character or a digit.
"""

return self._post(
route="/table/create",
params={
"namespace": namespace,
"table_name": table_name,
"schema": schema,
"description": description,
"is_private": is_private,
},
)

def insert_table(
self,
namespace: str,
table_name: str,
data: IO[bytes],
content_type: str,
) -> Any:
"""
https://docs.dune.com/api-reference/tables/endpoint/insert
The insert table endpoint allows you to insert data into an existing table in Dune.

The only limitations are:
- The file has to be in json or csv format
- The file has to have the same schema as the table
"""

return self._post(
route=f"/table/{namespace}/{table_name}/insert",
headers={"Content-Type": content_type},
data=data,
)
4 changes: 0 additions & 4 deletions tests/e2e/test_async_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ async def test_refresh_context_manager(self):
async def test_refresh_with_pagination(self):
# Arrange
async with AsyncDuneClient(self.valid_api_key) as cl:

# Act
results = (await cl.refresh(self.multi_rows_query, batch_size=1)).get_rows()

Expand All @@ -60,7 +59,6 @@ async def test_refresh_with_pagination(self):
async def test_refresh_with_filters(self):
# Arrange
async with AsyncDuneClient(self.valid_api_key) as cl:

# Act
results = (
await cl.refresh(self.multi_rows_query, filters="number < 3")
Expand All @@ -78,7 +76,6 @@ async def test_refresh_with_filters(self):
async def test_refresh_csv_with_pagination(self):
# Arrange
async with AsyncDuneClient(self.valid_api_key) as cl:

# Act
result_csv = await cl.refresh_csv(self.multi_rows_query, batch_size=1)

Expand All @@ -97,7 +94,6 @@ async def test_refresh_csv_with_pagination(self):
async def test_refresh_csv_with_filters(self):
# Arrange
async with AsyncDuneClient(self.valid_api_key) as cl:

# Act
result_csv = await cl.refresh_csv(
self.multi_rows_query, filters="number < 3"
Expand Down
60 changes: 60 additions & 0 deletions tests/e2e/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,66 @@ def test_upload_csv_success(self):
True,
)

@unittest.skip("This is a plus subscription endpoint.")
helanto marked this conversation as resolved.
Show resolved Hide resolved
def test_create_table_success(self):
# Make sure the table doesn't already exist.
# You will need to change the namespace to your own.
client = DuneClient(self.valid_api_key)

namespace = "test"
table_name = "dataset_e2e_test"

self.assertEqual(
client.create_table(
namespace=namespace,
table_name=table_name,
description="e2e test table",
schema=[
{"name": "date", "type": "timestamp"},
{"name": "dgs10", "type": "double"},
],
is_private=False,
),
{
"namespace": namespace,
"table_name": table_name,
"full_name": f"dune.{namespace}.{table_name}",
"example_query": f"select * from dune.{namespace}.{table_name} limit 10",
},
)

@unittest.skip("This is a plus subscription endpoint.")
def test_insert_table_csv_success(self):
# Make sure the table already exists and csv matches table schema.
# You will need to change the namespace to your own.
client = DuneClient(self.valid_api_key)
with open("./tests/fixtures/sample_table_insert.csv", "rb") as data:
self.assertEqual(
client.insert_table(
namespace="test",
table_name="dataset_e2e_test",
data=data,
content_type="text/csv",
),
None,
)

@unittest.skip("This is a plus subscription endpoint.")
def test_insert_table_json_success(self):
# Make sure the table already exists and json matches table schema.
# You will need to change the namespace to your own.
client = DuneClient(self.valid_api_key)
with open("./tests/fixtures/sample_table_insert.json", "rb") as data:
self.assertEqual(
client.insert_table(
namespace="test",
table_name="dataset_e2e_test",
data=data,
content_type="application/x-ndjson",
),
None,
)

def test_download_csv_with_pagination(self):
# Arrange
client = DuneClient(self.valid_api_key)
Expand Down
2 changes: 2 additions & 0 deletions tests/fixtures/sample_table_insert.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
date,dgs10
2020-12-01T23:33:00,10
1 change: 1 addition & 0 deletions tests/fixtures/sample_table_insert.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"date":"2020-12-01T23:33:00","dgs10":10}
Loading