from itertools import cycle
from time import sleep, time
from typing import Iterator
import pandas as pd
from tqdm import tqdm
from chainalysis._constants import ANALYTICAL_ENDPOINTS, BASE_URL
from chainalysis._exceptions import (
BadRequest,
DataSolutionsAPIException,
DataSolutionsSDKException,
UnhandledException,
)
from chainalysis.util_functions.requests import issue_request
[docs]
class AnalyticalQuery:
"""
This AnalyticalQuery class provides methods to execute SQL queries on Data Solutions
analytical tables. It supports fetching results as JSON or a
pandas DataFrame, and provides query execution statistics.
"""
def __init__(self, api_key: str):
"""
Initialize the Analytical class with the provided API key.
:param api_key: The API key for accessing the analytical service.
:type api_key: str
"""
self.api_key = api_key
[docs]
def __call__(
self,
query: str,
parameters: dict[str, str] = {},
polling_interval_sec: int = 5,
autopaginate: bool = True,
) -> "AnalyticalQuery":
"""
Execute a SQL query asynchronously using the provided parameters
and polling interval.
Autopagination is enabled by default. If autopagination is enabled,
the method will fetch all pages of results and return a single
AnalyticalQuery object with all results.
Otherwise, the method will return an AnalyticalQuery object with the
first page of results. Use the next_page() method to fetch the
next page of results and the stats() method to get statistics
that can help you determine the approach to take to fetch all results.
:param query: The SQL query to be executed.
:type query: str
:param parameters: A dictionary of parameters to be used in the query.
:type parameters: dict[str, str], optional
:param polling_interval_sec: The interval in seconds between status checks. The minimum value is 5 seconds.
:type polling_interval_sec: int, optional
:param autopaginate: Whether to automatically retrieve full results instead of individual pages.
:type autopaginate: bool, optional
:return: An instance of the Analytical class with query results.
:rtype: Analytical
:raises DataSolutionsAPIException: Raises an exception if the query ID is not returned.
:raises DataSolutionsSDKException: Raises an exception if an error occurs during query execution.
:raises Exception: Raises an exception if an unexpected error occurs.
"""
self._status_code = 0
self.results = []
self._stats = {}
self.json_response = {}
self.dataframe_data = None
self._status = "error"
self.error_message = ""
self.error_details = ""
self.next_url = None
self._total_pages = 0
self.query_id = None
self.exception = UnhandledException()
if polling_interval_sec < 5:
polling_interval_sec = 5
query_execution_url = (
f"{BASE_URL['base_url']}/{ANALYTICAL_ENDPOINTS['async_query_execution']}"
)
body = {
"sql": query,
"parameters": parameters,
}
async_response = issue_request(
api_key=self.api_key,
url=query_execution_url,
body=body,
method="POST",
)
self.query_id = async_response.get("query_id")
if not self.query_id:
raise DataSolutionsAPIException(
"Unexpected response. Query ID was not returned."
)
async_query_status_url = f"{BASE_URL['base_url']}/{ANALYTICAL_ENDPOINTS['async_query_status']}?query_id={self.query_id}"
try:
# Helper function to wait with a spinner animation
def wait_with_spinner(
pbar, spinner: Iterator[str], status: str, duration: float
) -> None:
end_time = time() + duration
while time() < end_time:
remaining_time = int(end_time - time())
pbar.set_description(
f"{next(spinner)} Query is {status}. Checking status again in {remaining_time}s"
)
sleep(0.1)
# Function to poll the query status until completion
def poll_query_status() -> bool:
spinner = cycle(["|", "/", "-", "\\"])
with tqdm(bar_format="{desc}", leave=False) as pbar:
while True:
pbar.set_description("Checking query status")
self.json_response = issue_request(
api_key=self.api_key,
url=async_query_status_url,
method="GET",
)
self._status = self.json_response["status"]
if self._status in {"running", "pending"}:
wait_with_spinner(
pbar, spinner, self._status, polling_interval_sec
)
elif self._status == "error":
self.error_message = self.json_response["message"]
self.error_details = self.json_response.get("details")
return False
elif self._status == "success":
self._status_code = 200
self._stats = self.json_response["stats"]
self.results = self.json_response["results"]
self.next_url = self.json_response["next"]
self._total_pages = self._stats["total_pages"]
return True
# Function to handle auto-pagination
def handle_autopagination() -> None:
results = self.results.copy()
total_pages = self._total_pages
with tqdm(total=total_pages, desc="Fetching data", unit="page") as pbar:
while self.has_next():
next_page = self.next_page()
if next_page._status != "error":
results.extend(next_page.results)
pbar.update(1)
else:
raise next_page.exception
# Ensure the progress bar reaches 100%
pbar.n = pbar.total
pbar.refresh()
self.results = results
self.next_url = None
# Main execution flow
if poll_query_status():
if autopaginate and self.has_next():
handle_autopagination()
except DataSolutionsSDKException as e:
self._status = "error"
self.exception = e.get_exception()
self._status_code = e.status_code
except Exception as e:
self._status = "error"
self.exception = UnhandledException(details=e)
return self
[docs]
def next_page(self) -> "AnalyticalQuery":
"""
Fetch the next page of analytical query results.
:return: An instance of the Analytical Query class with the next page of results.
:rtype: Analytical Query
:raises BadRequest: Raises an exception if there is no next page available.
"""
if self.next_url:
self.json_response = issue_request(
api_key=self.api_key,
url=self.next_url,
method="GET",
)
self._status = self.json_response["status"]
if self._status == "error":
self.error_message = self.json_response["message"]
self.error_details = self.json_response.get("details")
elif self._status == "success":
self._stats = self.json_response["stats"]
self.results = self.json_response["results"]
self.next_url = self.json_response["next"]
else:
raise BadRequest(
"No next page available. Use the method has_next() to check if there is a next page that can be retrieved."
)
return self
[docs]
def json(self) -> dict:
"""
Return results as a JSON.
:return: Results of the SQL query as a JSON.
:rtype: dict
:raises Exception: Raises an exception if the query resulted in an error.
"""
if self._status != "error":
return self.results
else:
raise self.exception
[docs]
def df(self) -> pd.DataFrame:
"""
Convert query results into a pandas DataFrame.
:return: DataFrame containing the results of the SQL query.
:rtype: pd.DataFrame
:raises Exception: Raises an exception if the query resulted in an error.
"""
if self._status != "error":
self.dataframe_data = pd.DataFrame(self.results)
return self.dataframe_data
else:
raise self.exception
[docs]
def stats(self) -> dict:
"""
Get the statistics of the executed query.
:return: Statistics of the query execution.
:rtype: dict
:raises Exception: Raises an exception if the query resulted in an error.
"""
if self._status != "error":
return self._stats
else:
raise self.exception
[docs]
def status_code(self) -> int:
"""
Get the HTTP status code of the response.
:return: HTTP status code.
:rtype: int
"""
return self._status_code
[docs]
def was_successful(self) -> bool:
"""
Determine if the query executed successfully.
:return: True if the query was successful, False otherwise.
:rtype: bool
"""
if self._status != "error":
return True
return False
[docs]
def total_pages(self) -> int:
"""
Return total number of pages.
:return: Number of pages.
:rtype: int
"""
return self._total_pages
[docs]
def has_next(self) -> bool:
"""
Return if the next page exists.
:return: Whether next page exists.
:rtype: bool
"""
if self.next_url:
return True
return False