clear;
clc;
close;

%% read the raw data
% this file assumes that the following ISSDA data are in the current directory:
% residential allocations.xls: this Excel file indicates the household treatment assignment.
% GasDataWeekX for X=0,...,77: each one of these Ascii files provides the household gas consumption during a specific week, measured every half hour.

% read raw data files 0-9
raw_data_0 = csvread('GasDataWeek 0',2,0);
raw_data_1 = csvread('GasDataWeek 1',2,0);
raw_data_2 = csvread('GasDataWeek 2',2,0);
raw_data_3 = csvread('GasDataWeek 3',2,0);
raw_data_4 = csvread('GasDataWeek 4',2,0);
raw_data_5 = csvread('GasDataWeek 5',2,0);
raw_data_6 = csvread('GasDataWeek 6',2,0);
raw_data_7 = csvread('GasDataWeek 7',2,0);
raw_data_8 = csvread('GasDataWeek 8',2,0);
raw_data_9 = csvread('GasDataWeek 9',2,0);

% read raw data files 10-19
raw_data_10 = csvread('GasDataWeek 10',2,0);
raw_data_11 = csvread('GasDataWeek 11',2,0);
raw_data_12 = csvread('GasDataWeek 12',2,0);
raw_data_13 = csvread('GasDataWeek 13',2,0);
raw_data_14 = csvread('GasDataWeek 14',2,0);
raw_data_15 = csvread('GasDataWeek 15',2,0);
raw_data_16 = csvread('GasDataWeek 16',2,0);
raw_data_17 = csvread('GasDataWeek 17',2,0);
raw_data_18 = csvread('GasDataWeek 18',2,0);
raw_data_19 = csvread('GasDataWeek 19',2,0);

% read raw data files 20-29
raw_data_20 = csvread('GasDataWeek 20',2,0);
raw_data_21 = csvread('GasDataWeek 21',2,0);
raw_data_22 = csvread('GasDataWeek 22',2,0);
raw_data_23 = csvread('GasDataWeek 23',2,0);
raw_data_24 = csvread('GasDataWeek 24',2,0);
raw_data_25 = csvread('GasDataWeek 25',2,0);
raw_data_26 = csvread('GasDataWeek 26',2,0);
raw_data_27 = csvread('GasDataWeek 27',2,0);
raw_data_28 = csvread('GasDataWeek 28',2,0);
raw_data_29 = csvread('GasDataWeek 29',2,0);

% read raw data files 30-39
raw_data_30 = csvread('GasDataWeek 30',2,0);
raw_data_31 = csvread('GasDataWeek 31',2,0);
raw_data_32 = csvread('GasDataWeek 32',2,0);
raw_data_33 = csvread('GasDataWeek 33',2,0);
raw_data_34 = csvread('GasDataWeek 34',2,0);
raw_data_35 = csvread('GasDataWeek 35',2,0);
raw_data_36 = csvread('GasDataWeek 36',2,0);
raw_data_37 = csvread('GasDataWeek 37',2,0);
raw_data_38 = csvread('GasDataWeek 38',2,0);
raw_data_39 = csvread('GasDataWeek 39',2,0);

% read raw data files 40-49
raw_data_40 = csvread('GasDataWeek 40',2,0);
raw_data_41 = csvread('GasDataWeek 41',2,0);
raw_data_42 = csvread('GasDataWeek 42',2,0);
raw_data_43 = csvread('GasDataWeek 43',2,0);
raw_data_44 = csvread('GasDataWeek 44',2,0);
raw_data_45 = csvread('GasDataWeek 45',2,0);
raw_data_46 = csvread('GasDataWeek 46',2,0);
raw_data_47 = csvread('GasDataWeek 47',2,0);
raw_data_48 = csvread('GasDataWeek 48',2,0);
raw_data_49 = csvread('GasDataWeek 49',2,0);

% read raw data files 50-59
raw_data_50 = csvread('GasDataWeek 50',2,0);
raw_data_51 = csvread('GasDataWeek 51',2,0);
raw_data_52 = csvread('GasDataWeek 52',2,0);
raw_data_53 = csvread('GasDataWeek 53',2,0);
raw_data_54 = csvread('GasDataWeek 54',2,0);
raw_data_55 = csvread('GasDataWeek 55',2,0);
raw_data_56 = csvread('GasDataWeek 56',2,0);
raw_data_57 = csvread('GasDataWeek 57',2,0);
raw_data_58 = csvread('GasDataWeek 58',2,0);
raw_data_59 = csvread('GasDataWeek 59',2,0);

% read raw data files 60-69
raw_data_60 = csvread('GasDataWeek 60',2,0);
raw_data_61 = csvread('GasDataWeek 61',2,0);
raw_data_62 = csvread('GasDataWeek 62',2,0);
raw_data_63 = csvread('GasDataWeek 63',2,0);
raw_data_64 = csvread('GasDataWeek 64',2,0);
raw_data_65 = csvread('GasDataWeek 65',2,0);
raw_data_66 = csvread('GasDataWeek 66',2,0);
raw_data_67 = csvread('GasDataWeek 67',2,0);
raw_data_68 = csvread('GasDataWeek 68',2,0);
raw_data_69 = csvread('GasDataWeek 69',2,0);

% read raw data files 70-77
raw_data_70 = csvread('GasDataWeek 70',2,0);
raw_data_71 = csvread('GasDataWeek 71',2,0);
raw_data_72 = csvread('GasDataWeek 72',2,0);
raw_data_73 = csvread('GasDataWeek 73',2,0);
raw_data_74 = csvread('GasDataWeek 74',2,0);
raw_data_75 = csvread('GasDataWeek 75',2,0);
raw_data_76 = csvread('GasDataWeek 76',2,0);
raw_data_77 = csvread('GasDataWeek 77',2,0);

% read file with treatment assignments;
assignments_csv = xlsread('Residential allocations.xls');
assignments_csv = assignments_csv(:,1:2);
assignments_csv(isnan(assignments_csv))=0;

% create results directory to save the processed data:
mkdir results
% save results
cd results
save gas_data1 raw_data_* assignments_csv
% clear
% load gas_data1
cd ..

%% create consumption matrix in matlab format
t_grid = [];
i_grid = [];

for j=1:77
    % disp(j)
    cd results
    eval(['load gas_data1 raw_data_',num2str(j)]);
    cd ..
    
    eval(['data = raw_data_',num2str(j),';']);
    
    i_grid = [i_grid;data(:,1)]; %#ok<AGROW>
    i_grid = unique(i_grid);
    
    t_grid = [t_grid;data(:,2)]; %#ok<AGROW>
    t_grid = unique(t_grid);
end

consumption = NaN(size(i_grid,1),size(t_grid,1));

for j=1:77
    % disp(j)
    cd results
    eval(['load gas_data1 raw_data_',num2str(j)]);
    cd ..
    
    eval(['data = raw_data_',num2str(j),';']);
    for indice = 1:size(data,1)
        consumption(i_grid==data(indice,1),t_grid==data(indice,2)) = data(indice,3); 
    end
end

% save preliminary results. 
cd results
save gas_data2 raw_data_* consumption assignments_csv t_grid i_grid
% clear
% load gas_data2
cd ..

%% create treatment assignment vector
treatment = NaN(size(i_grid));
for indice=1:size(i_grid,1)
    indidice_ass_matrix = find(i_grid(indice)==assignments_csv(:,1));
    treatment(indice)=assignments_csv(indidice_ass_matrix,2);
end
assert(sum(isnan(treatment))==0);

%% delete missing consumption data;
missing_data = sum(isnan(consumption),2)>0;
consumption = consumption(missing_data==0,:);
i_grid = i_grid(missing_data==0,:);
treatment = treatment(missing_data==0,:);


% For each point in the time grid, define day and hour;
day = floor(t_grid/100);
hour = t_grid-day*100;

%% save results
cd results
save gas_data_final consumption treatment t_grid i_grid day hour
cd ..

