"""

Uses pre-defined algorithms to output text metrics
to file.

You may need to run
nltk.download('vader_lexicon')
nltk.download('punkt')
"""

import os
import configparser
import json
import pandas as pd
import importlib
import src.txtutils as txtutil
import src.txtmetrics as txtmet

importlib.reload(txtmet)
importlib.reload(txtutil)

# ---------------------------------------------------------------------------
# Settings
# ---------------------------------------------------------------------------
config = configparser.ConfigParser()
config.read("config.ini")
paper_list = json.loads(config.get("papers", "paper_list"))
# Better to leave test out of it for later runs,
# but useful to have it create same output for examples
# so always run it when creating text metrics
if "TEST" not in paper_list:
    paper_list = ["TEST"] + paper_list

for newspaper in paper_list:
    output_file_name = newspaper + config["fEnds"]["met_pa"]
    # Set to none for all:
    nrows = None
    # -------------------------------------------------------------------------
    # Read & clean the text
    # -------------------------------------------------------------------------
    df = txtutil.getCleanTextDf(newspaper, nrows)
    # =========================================================================
    # Run the text metrics
    # =========================================================================
    # Run functions from txtmetrics
    print("Running text metrics")
    funcs_to_run = [
        txtmet.opinion,
        txtmet.tf_idf_econom,
        txtmet.tf_idf_uncertain,
        txtmet.harvard,
        txtmet.loughran,
        txtmet.vader,
        txtmet.afinn,
        txtmet.word_count_econom,
        txtmet.word_count_uncertain,
        txtmet.nyman,
        txtmet.alexopoulos,
        txtmet.baker_bloom_davis,
        txtmet.husted,
        txtmet.stability,
    ]
    # Grab the dict to their nice names
    func_names = dict(config["txtmetrics_u"])
    func_names.update(dict(config["txtmetrics_s"]))

    for i, func in enumerate(funcs_to_run):
        print("\n ---------- \n On iteration " + str(i))
        print("Currently running " + func.__name__)
        print("also known as " + func_names[func.__name__])
        if func.__name__ not in df.columns:
            try:
                df[func.__name__] = func(df["cleanText"])
            except:
                print("Running " + func.__name__ + "on newspaper failed")
    cols_to_keep = [f.__name__ for f in funcs_to_run]
    (
        df.drop([x for x in df.columns if x not in cols_to_keep], axis=1).to_csv(
            os.path.join(config["data"]["intermed"], output_file_name),
            date_format="%d %m %Y",
            encoding="utf8",
        )
    )
    print("Finished running textmetricproduce on " + newspaper)
