"""
This script grabs data from Dow Jones given an API key
"""
import requests
import json
import os
from time import sleep
from pathlib import Path
import uuid

# Directory for downloads
articles_dir = Path(os.path.realpath("DataSources/DowJones"))
# Directory for credentials
creds_dir_full = Path(os.path.realpath("creds"))
# Name of file with credentials in
cred_file_name = Path("creds.txt")
# URL of the analytics endpoint (for extraction documents)
url_analytics = "https://api.dowjones.com/alpha/extractions/documents"
# Temporary fix for path issues
MY_API_KEY = open(creds_dir_full / cred_file_name).read().strip()
headers_analytics = {"content-type": "application/json", "user-key": MY_API_KEY}

# =============================================================================
# QUERY DESIGN
# =============================================================================
start_date = "1990-01-01 00:00:00"
publication_source_codes = [
    "DAIM",  # Daily Mail
    "DMIRR",  # Daily Mirror
    "GRDN",  # Guardian
    "THESUN",  # The Sun
    "T",  # The Times
]
publication_source_codes = [x.lower() for x in publication_source_codes]
subject_codes = [
    "mcat",  # Commodity/Financial Market News
    "ccat",  # Corp/Industrial News
    "ecat",  # Economic News
]
article_type_codes = [
    "nedi",  # Editorials
    "nedc",  # Commentaries/Opinions
]
exclude_list = [
    "nrgn",  # Routine General News
]
# Build query
query_analytics = {
    "query": {
        "where": "language_code = 'en' AND"
        + " publication_datetime >= '"
        + start_date
        + "' AND "
        + "(LOWER(source_code) IN ('"
        + "', '".join(publication_source_codes)
        + "') )",
        "frequency": "DAY",
        "date_field": "publication_datetime",
        "group_by_source_code": True,
        "includes": {"subject_codes": subject_codes + article_type_codes},
        "exlcudes": {"subject_codes": exclude_list},
    }
}


# =============================================================================
# Run query
# =============================================================================
# Generate query id
query_id = str(uuid.uuid4())


# Save query information
fname = articles_dir / Path(query_id + "_query.json")
with open(fname, "w") as f:
    print("Writing to", fname)
    # re-serialize it for pretty indentation
    f.write(json.dumps(query_analytics, indent=2))
# Make request

response = requests.post(
    url_analytics + "/_explain",
    data=json.dumps(query_analytics),
    headers=headers_analytics,
)
# Check the explain to verify the query was valid
if response.status_code != 201:
    print("ERROR: An error occurred creating an explain: " + response.text)
else:
    responsejson = response.json()
    responsetext = response.text
    # Save response to json
    fname = articles_dir / Path(query_id + "_response.json")
    with open(fname, "w") as f:
        print("Writing to", fname)
        # re-serialize it for pretty indentation
        f.write(json.dumps(responsejson, indent=2))
    # Save response to text
    fname = articles_dir / Path(query_id + "_responsetxt.txt")
    with open(fname, "w") as f:
        print("Writing to", fname)
        # re-serialize it for pretty indentation
        f.write(json.dumps(responsetext, indent=2))
    # Show result of request:
    print(responsejson)


# Create a snapshot with the given query
print("Creating the snapshot: " + json.dumps(query_analytics))
response = requests.post(
    url_analytics, data=json.dumps(query_analytics), headers=headers_analytics
)
print(response.text)

# Verify the response from creating an extraction is OK
if response.status_code != 201:
    print("ERROR: An error occurred creating an extraction: " + response.text)
else:
    extraction = response.json()
    print(extraction)
    print("Extraction Created. Job ID: " + extraction["data"]["id"])
    self_link = extraction["links"]["self"]
    sleep(30)
    print("Checking state of the job.")

while True:
    # We now call the second endpoint, which will tell us if the extraction is ready.
    status_response = requests.get(self_link, headers=headers_analytics)

    # Verify the response from the self_link is OK
    if status_response.status_code != 200:
        print(
            "ERROR: an error occurred getting the details for the extraction: "
            + status_response.text
        )
    else:
        # There is an edge case where the job does not have a current_state yet. If current_state
        # does not yet exist in the response, we will sleep for 10 seconds
        status = status_response.json()
        if "current_state" in status["data"]["attributes"]:
            currentState = status["data"]["attributes"]["current_state"]
            print("Current state is: " + currentState)

            # Job is still running, Sleep for 10 seconds
            if currentState == "JOB_STATE_RUNNING":
                print("Sleeping for 30 seconds... Job state running")
                sleep(30)

            elif currentState == "JOB_VALIDATING":
                print("Sleeping for 30 seconds... Job validating")
                sleep(30)

            elif currentState == "JOB_QUEUED":
                print("Sleeping for 30 seconds... Job queued")
                sleep(30)

            elif currentState == "JOB_CREATED":
                print("Sleeping for 30 seconds... Job created")
                sleep(30)

            else:
                # If currentState is JOB_STATE_DONE then everything completed successfully
                if currentState == "JOB_STATE_DONE":
                    print("Job completed successfully")
                    print("Downloading snapshot files to current directory")
                    for file in status["data"]["attributes"]["files"]:
                        filepath = file["uri"]
                        parts = filepath.split("/")
                        filename = parts[len(parts) - 1]
                        r = requests.get(
                            file["uri"], stream=True, headers=headers_analytics
                        )
                        dir_path = os.path.dirname(os.path.realpath(__file__))
                        filename = os.path.join(dir_path, filename)
                        with open(filename, "wb") as fd:
                            for chunk in r.iter_content(chunk_size=128):
                                fd.write(chunk)

                # job has another state that means it was not successful.
                else:
                    print(
                        "An error occurred with the job. Final state is: "
                        + currentState
                    )

                break
        else:
            print("Sleeping for 30 seconds...")
            sleep(30)
