{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "92475c69",
   "metadata": {},
   "source": [
    "# Python API Example - Access FOB Hub Netbacks\n",
    "\n",
    "This guide is designed to provide an example of how to access the Spark API:\n",
    "- The path to your client credentials is the only input needed to run this script (just before Section 2)\n",
    "- This script has been designed to display the raw outputs of requests from the API, and then shows you how to format those outputs to enable easy reading and analysis\n",
    "\n",
    "__N.B. This guide is just for Access FOB Hub Netbacks data. If you're looking for other API data products (such as contract prices, Freight routes or Netbacks), please refer to their according code example files.__ "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fc7443de",
   "metadata": {},
   "source": [
    "### Have any questions?\n",
    "\n",
    "If you have any questions regarding our API, or need help accessing specific datasets, please contact us at:\n",
    "\n",
    "__data@sparkcommodities.com__\n",
    "\n",
    "or refer to our API website for more information about this endpoint: https://www.sparkcommodities.com/api/request/access.html"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c5716130",
   "metadata": {},
   "source": [
    "## 1. Importing Data\n",
    "\n",
    "Here we define the functions that allow us to retrieve the valid credentials to access the Spark API.\n",
    "\n",
    "This section can remain unchanged for most Spark API users."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fe759439",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "import sys\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from base64 import b64encode\n",
    "from pprint import pprint\n",
    "from urllib.parse import urljoin\n",
    "import datetime\n",
    "from io import StringIO\n",
    "\n",
    "\n",
    "try:\n",
    "    from urllib import request, parse\n",
    "    from urllib.error import HTTPError\n",
    "except ImportError:\n",
    "    raise RuntimeError(\"Python 3 required\")\n",
    "\n",
    "\n",
    "API_BASE_URL = \"https://api.sparkcommodities.com\"\n",
    "\n",
    "\n",
    "def retrieve_credentials(file_path=None):\n",
    "    \"\"\"\n",
    "    Find credentials either by reading the client_credentials file or reading\n",
    "    environment variables\n",
    "    \"\"\"\n",
    "    if file_path is None:\n",
    "        client_id = os.getenv(\"SPARK_CLIENT_ID\")\n",
    "        client_secret = os.getenv(\"SPARK_CLIENT_SECRET\")\n",
    "        if not client_id or not client_secret:\n",
    "            raise RuntimeError(\n",
    "                \"SPARK_CLIENT_ID and SPARK_CLIENT_SECRET environment vars required\"\n",
    "            )\n",
    "    else:\n",
    "        # Parse the file\n",
    "        if not os.path.isfile(file_path):\n",
    "            raise RuntimeError(\"The file {} doesn't exist\".format(file_path))\n",
    "\n",
    "        with open(file_path) as fp:\n",
    "            lines = [l.replace(\"\\n\", \"\") for l in fp.readlines()]\n",
    "\n",
    "        if lines[0] in (\"clientId,clientSecret\", \"client_id,client_secret\"):\n",
    "            client_id, client_secret = lines[1].split(\",\")\n",
    "        else:\n",
    "            print(\"First line read: '{}'\".format(lines[0]))\n",
    "            raise RuntimeError(\n",
    "                \"The specified file {} doesn't look like to be a Spark API client \"\n",
    "                \"credentials file\".format(file_path)\n",
    "            )\n",
    "\n",
    "    print(\">>>> Found credentials!\")\n",
    "    print(\n",
    "        \">>>> Client_id={}, client_secret={}****\".format(client_id, client_secret[:5])\n",
    "    )\n",
    "\n",
    "    return client_id, client_secret\n",
    "\n",
    "\n",
    "def do_api_post_query(uri, body, headers):\n",
    "    url = urljoin(API_BASE_URL, uri)\n",
    "\n",
    "    data = json.dumps(body).encode(\"utf-8\")\n",
    "\n",
    "    # HTTP POST request\n",
    "    req = request.Request(url, data=data, headers=headers)\n",
    "    try:\n",
    "        response = request.urlopen(req)\n",
    "    except HTTPError as e:\n",
    "        print(\"HTTP Error: \", e.code)\n",
    "        print(e.read())\n",
    "        sys.exit(1)\n",
    "\n",
    "    resp_content = response.read()\n",
    "\n",
    "    # The server must return HTTP 201. Raise an error if this is not the case\n",
    "    assert response.status == 201, resp_content\n",
    "\n",
    "    # The server returned a JSON response\n",
    "    content = json.loads(resp_content)\n",
    "\n",
    "    return content\n",
    "\n",
    "\n",
    "def do_api_get_query(uri, access_token, format='json'):\n",
    "    \"\"\"\n",
    "    After receiving an Access Token, we can request information from the API.\n",
    "    \"\"\"\n",
    "    url = urljoin(API_BASE_URL, uri)\n",
    "\n",
    "    if format == 'json':\n",
    "        headers = {\n",
    "            \"Authorization\": \"Bearer {}\".format(access_token),\n",
    "            \"Accept\": \"application/json\",\n",
    "        }\n",
    "    elif format == 'csv':\n",
    "        headers = {\n",
    "            \"Authorization\": \"Bearer {}\".format(access_token),\n",
    "            \"Accept\": \"text/csv\"\n",
    "        }\n",
    "    else:\n",
    "        raise ValueError('The format parameter only takes `csv` or `json` as inputs')\n",
    "\n",
    "    # HTTP GET request\n",
    "    req = request.Request(url, headers=headers)\n",
    "    try:\n",
    "        response = request.urlopen(req)\n",
    "    except HTTPError as e:\n",
    "        print(\"HTTP Error: \", e.code)\n",
    "        print(e.read())\n",
    "        sys.exit(1)\n",
    "\n",
    "    resp_content = response.read()\n",
    "    #status = response.status\n",
    "\n",
    "    # The server must return HTTP 200. Raise an error if this is not the case\n",
    "    assert response.status == 200, resp_content\n",
    "\n",
    "    # Storing response based on requested format\n",
    "    if format == 'json':\n",
    "        content = json.loads(resp_content)\n",
    "    elif format == 'csv':\n",
    "        content = resp_content\n",
    "\n",
    "    return content\n",
    "\n",
    "\n",
    "def get_access_token(client_id, client_secret):\n",
    "    \"\"\"\n",
    "    Get a new access_token. Access tokens are the thing that applications use to make\n",
    "    API requests. Access tokens must be kept confidential in storage.\n",
    "\n",
    "    # Procedure:\n",
    "\n",
    "    Do a POST query with `grantType` and `scopes` in the body. A basic authorization\n",
    "    HTTP header is required. The \"Basic\" HTTP authentication scheme is defined in\n",
    "    RFC 7617, which transmits credentials as `clientId:clientSecret` pairs, encoded\n",
    "    using base64.\n",
    "    \"\"\"\n",
    "\n",
    "    # Note: for the sake of this example, we choose to use the Python urllib from the\n",
    "    # standard lib. One should consider using https://requests.readthedocs.io/\n",
    "\n",
    "    payload = \"{}:{}\".format(client_id, client_secret).encode()\n",
    "    headers = {\n",
    "        \"Authorization\": \"Basic {}\".format(b64encode(payload).decode()),\n",
    "        \"Accept\": \"application/json\",\n",
    "        \"Content-Type\": \"application/json\",\n",
    "    }\n",
    "    body = {\n",
    "        \"grantType\": \"clientCredentials\",\n",
    "    }\n",
    "\n",
    "    content = do_api_post_query(uri=\"/oauth/token/\", body=body, headers=headers)\n",
    "\n",
    "    print(\n",
    "        \">>>> Successfully fetched an access token {}****, valid {} seconds.\".format(\n",
    "            content[\"accessToken\"][:5], content[\"expiresIn\"]\n",
    "        )\n",
    "    )\n",
    "\n",
    "    return content[\"accessToken\"]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a51e1ef0",
   "metadata": {},
   "source": [
    "## N.B. Credentials\n",
    "\n",
    "Here we call the above functions, and input the file path to our credentials.\n",
    "\n",
    "N.B. You must have downloaded your client credentials CSV file before proceeding. Please refer to the API documentation if you have not dowloaded them already.  Instructions for downloading your credentials can be found here:\n",
    "\n",
    "https://api.sparkcommodities.com/redoc#section/Authentication/Create-an-Oauth2-Client\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0817250f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Insert file path to your client credentials here\n",
    "client_id, client_secret = retrieve_credentials(file_path=\"/tmp/client_credentials.csv\")\n",
    "\n",
    "# Authenticate:\n",
    "access_token = get_access_token(client_id, client_secret)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bdc1fb62",
   "metadata": {},
   "source": [
    "# 2. Reference Data\n",
    "\n",
    "Fetching the reference-data endpoint to obtain a list of available FOB port, regas terminals & their corresponding UUIDs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f4ddd6e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from io import StringIO\n",
    "\n",
    "## Defining the reference-data calling function\n",
    "\n",
    "def fetch_fobhub_reference_data(access_token, format='json'):\n",
    "\n",
    "    content = do_api_get_query(\n",
    "        uri=\"/v1.0/lng/access/fob-hub-netbacks/reference-data/\", access_token=access_token, format=format\n",
    "    )\n",
    "\n",
    "    if format == 'json':\n",
    "        data = content\n",
    "    elif format == 'csv':\n",
    "        # if there's no data to show, returns raw response (empty string) and \"No Data to Show\" message\n",
    "        if len(content) == 0:\n",
    "            data = content\n",
    "            print('No Data to Show')\n",
    "        else:\n",
    "            data = content.decode('utf-8')\n",
    "            data = pd.read_csv(StringIO(data))  # automatically converting into a Pandas DataFrame when choosing CSV format\n",
    "\n",
    "    return data\n",
    "\n",
    "ref_data = fetch_fobhub_reference_data(access_token, format='csv')\n",
    "ref_data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e71c4a50",
   "metadata": {},
   "source": [
    "# 3. Calling FOB Hub Netbacks data\n",
    "\n",
    "Here we define a function to fetch the FOB Hub Netbacks dataset. The available parameters can be found on our API docs:\n",
    "\n",
    "https://www.sparkcommodities.com/api/lng-access/fob-hub-netbacks.html\n",
    "\n",
    "\n",
    "__N.B.:__ This endpoint provides the option to return a JSON or CSV formatted response. Metadata is only available in the JSON format."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa52c69a",
   "metadata": {},
   "outputs": [],
   "source": [
    "from io import StringIO\n",
    "\n",
    "## Defining the function\n",
    "\n",
    "def fetch_price_releases(access_token, unit, fob_port_uuid, via_point, start=None, end=None, limit=None, offset=None, regas_terminal_uuid=None, format='json'):\n",
    "\n",
    "    query_params = \"?unit={}\".format(unit)\n",
    "\n",
    "    query_params += \"&fob-port-uuid={}\".format(fob_port_uuid)\n",
    "\n",
    "    if via_point is not None:\n",
    "        query_params += \"&via-point={}\".format(via_point)\n",
    "\n",
    "    if start is not None:\n",
    "        query_params += \"&start={}\".format(start)\n",
    "    if end is not None:\n",
    "        query_params += \"&end={}\".format(end)\n",
    "    \n",
    "    if regas_terminal_uuid is not None:\n",
    "        query_params += \"&regas-terminal-uuid={}\".format(regas_terminal_uuid)\n",
    "    \n",
    "\n",
    "    uri = \"/v1.0/lng/access/fob-hub-netbacks/{}\".format(query_params)\n",
    "\n",
    "    content = do_api_get_query(\n",
    "        uri=uri, access_token=access_token, format=format\n",
    "    )\n",
    "\n",
    "    if format == 'json':\n",
    "        data = content\n",
    "    elif format == 'csv':\n",
    "        # if there's no data to show, returns raw response (empty string) and \"No Data to Show\" message\n",
    "        if len(content) == 0:\n",
    "            data = content\n",
    "            print('No Data to Show')\n",
    "        else:\n",
    "            data = content.decode('utf-8')\n",
    "            data = pd.read_csv(StringIO(data))  # automatically converting into a Pandas DataFrame when choosing CSV format\n",
    "\n",
    "    return data\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8683002d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Fetching a FoB Port UUID\n",
    "sabine_uuid = ref_data[ref_data['FOBPortName'] == 'Sabine Pass']['FOBPortUUID'].iloc[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "37704d61",
   "metadata": {},
   "source": [
    "#### JSON Response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3970e2d0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calling historical data - JSON response\n",
    "\n",
    "json_data = fetch_price_releases(access_token, unit='usd-per-mmbtu', fob_port_uuid=sabine_uuid,\n",
    "                                            via_point=None, start='2026-01-01', end='2026-01-03')\n",
    "json_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "23f95f71",
   "metadata": {},
   "outputs": [],
   "source": [
    "# checking metaData\n",
    "json_data['metaData']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "047ebd7a",
   "metadata": {},
   "source": [
    "#### CSV Response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5670a704",
   "metadata": {},
   "outputs": [],
   "source": [
    "# fetching the data in CSV format\n",
    "csv_data = fetch_price_releases(access_token, unit='usd-per-mmbtu', fob_port_uuid=sabine_uuid, via_point=None, \n",
    "                                            start='2026-01-01', end='2026-01-03', format='csv')\n",
    "csv_data.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cf9b9685",
   "metadata": {},
   "outputs": [],
   "source": [
    "# listing the column names of the DataFrame\n",
    "csv_data.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2e5fd6c1",
   "metadata": {},
   "source": [
    "#### Using terminal parameter to retrieve one terminal's data only"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c2f46435",
   "metadata": {},
   "outputs": [],
   "source": [
    "# choosing terminal and retrieving relevant UUID\n",
    "term_name = 'Montoir'\n",
    "term_uuid = ref_data[ref_data['RegasTerminalName'] == term_name]['RegasTerminalUUID'].iloc[0]\n",
    "\n",
    "# fetching the data in CSV format\n",
    "term_data = fetch_price_releases(access_token, unit='usd-per-mmbtu', start='2026-01-01', end='2026-01-10', \\\n",
    "                                            fob_port_uuid=sabine_uuid, via_point=None,\n",
    "                                             regas_terminal_uuid=term_uuid, format='csv')\n",
    "term_data.head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4836a33a",
   "metadata": {},
   "source": [
    "## N.B. Historical Data Limits\n",
    "\n",
    "Currently, a maximum of 1 year's worth of historical data can be called at one time due to the size of the data file. \n",
    "\n",
    "If more data points are required, the below code can be used: the function calls the historical data 1 year at a time and combines the data into one Pandas DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "29a5ce06",
   "metadata": {},
   "outputs": [],
   "source": [
    "import datetime\n",
    "\n",
    "def loop_historical_data(access_token, fob_port_uuid, via_point, unit, hist_start, hist_end, regas_terminal_uuid=None):\n",
    "    \n",
    "    hist_diff = (datetime.datetime.strptime(hist_end, '%Y-%m-%d') - datetime.datetime.strptime(hist_start, '%Y-%m-%d')).days\n",
    "    t = 0\n",
    "\n",
    "    starts = []\n",
    "    ends = []\n",
    "\n",
    "    w = 365\n",
    "    print(hist_diff)\n",
    "    while t < hist_diff:\n",
    "        print(t)\n",
    "        # initialising dataframe\n",
    "        if t == 0 and hist_diff>w:\n",
    "            diff_end = datetime.datetime.strftime(datetime.datetime.strptime(hist_start, '%Y-%m-%d') + pd.Timedelta(w, unit='days'), '%Y-%m-%d')\n",
    "            hist_df = fetch_price_releases(access_token, fob_port_uuid=fob_port_uuid, via_point=via_point,unit=unit, start=hist_start, end=diff_end, format='csv')\n",
    "\n",
    "            starts.append(hist_start)\n",
    "            ends.append(diff_end)\n",
    "        \n",
    "        elif t==0 and hist_diff<=w:\n",
    "            hist_df = fetch_price_releases(access_token, fob_port_uuid=fob_port_uuid, via_point=via_point,unit=unit, start=hist_start, end=hist_end, format='csv')\n",
    "\n",
    "        # appending additional historical data\n",
    "        else:\n",
    "            if t < hist_diff-w:\n",
    "                diff_start = datetime.datetime.strftime(datetime.datetime.strptime(hist_start, '%Y-%m-%d') + pd.Timedelta(t+1, unit='days'), '%Y-%m-%d')\n",
    "                diff_end = datetime.datetime.strftime(datetime.datetime.strptime(diff_start, '%Y-%m-%d') + pd.Timedelta(w, unit='days'), '%Y-%m-%d')\n",
    "                historical_addition = fetch_price_releases(access_token, fob_port_uuid=fob_port_uuid, via_point=via_point,unit=unit, start=diff_start, end=diff_end, format='csv')\n",
    "                try:\n",
    "                    hist_df = pd.concat([hist_df,historical_addition])\n",
    "                #exception if hist_df is empty\n",
    "                except:\n",
    "                    hist_df = historical_addition.copy()\n",
    "                starts.append(hist_start)\n",
    "                ends.append(diff_end)\n",
    "            else:\n",
    "                diff_start = datetime.datetime.strftime(datetime.datetime.strptime(hist_start, '%Y-%m-%d') + pd.Timedelta(t+1, unit='days'), '%Y-%m-%d')\n",
    "                diff_end = datetime.datetime.strftime(datetime.datetime.strptime(diff_start, '%Y-%m-%d') + pd.Timedelta(hist_diff-t, unit='days'), '%Y-%m-%d')\n",
    "                historical_addition = fetch_price_releases(access_token, fob_port_uuid=fob_port_uuid, via_point=via_point,unit=unit, start=diff_start, end=diff_end, format='csv')\n",
    "                hist_df = pd.concat([hist_df, historical_addition])\n",
    "                starts.append(hist_start)\n",
    "                ends.append(diff_end)\n",
    "        \n",
    "        #looping by year\n",
    "        t += w\n",
    "\n",
    "    for c in list(hist_df.columns)[13:]:\n",
    "        hist_df[c] = pd.to_numeric(hist_df[c])\n",
    "    hist_df['ReleaseDate'] = pd.to_datetime(hist_df['ReleaseDate'])\n",
    "\n",
    "    return hist_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "631ac210",
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "hist_df = loop_historical_data(access_token, unit='usd-per-mmbtu', fob_port_uuid=sabine_uuid, via_point=None, \n",
    "                                            hist_start='2024-01-01', hist_end='2026-01-03')\n",
    "\n",
    "hist_df\n",
    "\"\"\""
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}