In [ ]:
! pip install eemeter
Requirement already satisfied: eemeter in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (3.1.0)
Requirement already satisfied: scipy in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from eemeter) (1.7.3)
Requirement already satisfied: pandas in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from eemeter) (1.3.5)
Requirement already satisfied: click in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from eemeter) (8.0.3)
Requirement already satisfied: statsmodels in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from eemeter) (0.13.1)
Requirement already satisfied: colorama in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from click->eemeter) (0.4.4)
Requirement already satisfied: numpy>=1.21.0 in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from pandas->eemeter) (1.22.0)
Requirement already satisfied: python-dateutil>=2.7.3 in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from pandas->eemeter) (2.8.2)
Requirement already satisfied: pytz>=2017.3 in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from pandas->eemeter) (2021.3)
Requirement already satisfied: patsy>=0.5.2 in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from statsmodels->eemeter) (0.5.2)
Requirement already satisfied: six in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from patsy>=0.5.2->statsmodels->eemeter) (1.16.0)
In [ ]:
import eemeter
import pandas as pd
import numpy as np
import datetime
from pytz import UTC
import matplotlib.pyplot as plt
In [ ]:
meter_data = eemeter.io.meter_data_from_csv('data.csv', tz='UTC', start_col='period_start_time', value_col='sum_energy_consumed_calculated', freq='hourly')
temperature_data = eemeter.io.temperature_data_from_csv('data.csv', tz='UTC', date_col='period_start_time', temp_col='temperature_celsius', freq='hourly')

meter_data = meter_data.rename(columns={"sum_energy_consumed_calculated": "value", "period_start_time": "start" })
meter_data = meter_data.tz_convert('America/Toronto')
temperature_data = temperature_data.tz_convert('America/Toronto')
eemeter.plot_time_series(meter_data, temperature_data, figsize=(16, 4))
Out[ ]:
(<AxesSubplot:ylabel='Energy Use'>, <AxesSubplot:ylabel='Temperature'>)
In [ ]:
ax = eemeter.plot_energy_signature(meter_data, temperature_data, figsize=(14, 8))
In [ ]:
events = [
    {'start':'2021-11-16 15:00:00','end':'2021-11-16 22:00:00'},
    {'start':'2021-11-25 15:00:00','end':'2021-11-25 22:00:00'},
    {'start':'2021-12-20 04:00:00','end':'2021-12-20 11:00:00'},
    {'start':'2021-12-23 04:00:00','end':'2021-12-23 11:00:00'},
    {'start':'2022-01-03 04:00:00','end':'2022-01-03 11:00:00'},
    {'start':'2022-01-03 15:00:00','end':'2022-01-03 22:00:00'},
    {'start':'2022-01-04 04:00:00','end':'2022-01-04 11:00:00'},
    {'start':'2022-01-04 15:00:00','end':'2022-01-04 22:00:00'},
]

event = events[-2]
event_indices = pd.date_range(start = event['start'], end=event['end'], freq= 'H', tz='America/Toronto')
event_meter_data = meter_data.loc[meter_data.index.isin(event_indices)]

#blackout the events indices to create the baseline
blackout_event_indices = [pd.date_range(start = e['start'], end=e['end'], freq= 'H', tz='America/Toronto') for e in events]
blackout_event_indices = blackout_event_indices[0].union_many(blackout_event_indices[1:])
In [ ]:
#the baseline is 45 days prior and 15 days post event to capture as much information as possible.
baseline_start = pd.to_datetime(event['start']).date()-pd.Timedelta(45, unit='D')
#The baseline end date can also be the event date.
#baseline_end = pd.to_datetime(event['start']).date()+pd.Timedelta(15, unit='D')
baseline_end = pd.to_datetime(event['start']).date()

#Trim the data to the baseline period 
baseline_meter_data = meter_data.loc[(meter_data.index.date>=baseline_start) & (meter_data.index.date<baseline_end)].asfreq('H')
#blackout the event (this can also be the event day)
blackout_meter_data = baseline_meter_data.loc[(~baseline_meter_data.index.isin(blackout_event_indices))].asfreq('H')

#create the preliminary matrix
preliminary_design_matrix = eemeter.create_caltrack_hourly_preliminary_design_matrix(
    blackout_meter_data, temperature_data
)
preliminary_design_matrix
Out[ ]:
meter_value temperature_mean cdd_65 hdd_50 n_hours_dropped n_hours_kept hour_of_week
period_start_time
2021-11-20 00:00:00-05:00 0.973140 0.5 0.0 49.5 0.0 1.0 120
2021-11-20 01:00:00-05:00 0.687283 -0.3 0.0 50.3 0.0 1.0 121
2021-11-20 02:00:00-05:00 1.017613 -0.3 0.0 50.3 0.0 1.0 122
2021-11-20 03:00:00-05:00 1.295297 -0.6 0.0 50.6 0.0 1.0 123
2021-11-20 04:00:00-05:00 1.381657 -0.8 0.0 50.8 0.0 1.0 124
... ... ... ... ... ... ... ...
2022-01-03 19:00:00-05:00 NaN NaN NaN NaN NaN NaN NaN
2022-01-03 20:00:00-05:00 NaN NaN NaN NaN NaN NaN NaN
2022-01-03 21:00:00-05:00 NaN NaN NaN NaN NaN NaN NaN
2022-01-03 22:00:00-05:00 NaN NaN NaN NaN NaN NaN NaN
2022-01-03 23:00:00-05:00 NaN NaN NaN NaN NaN NaN NaN

1080 rows × 7 columns

In [ ]:
#segment the hourly model then create the DR model. This follows a standard calTRACK hourly process 
#critically here, the segmentation type is single, one model fit over the entire 45 day period
segment_type = 'single'
segmentation = eemeter.segment_time_series(
    preliminary_design_matrix.index,
    segment_type = segment_type
)

occupancy_lookup = eemeter.estimate_hour_of_week_occupancy(
    preliminary_design_matrix,
    segmentation=segmentation,
    # threshold=0.65  # default
)
occupancy_lookup.astype(int).plot(yticks=[0,1],ylim=[-0.1,1.1],style={'all':'ro'}, figsize=(14, 4))
Out[ ]:
<AxesSubplot:xlabel='hour_of_week'>
In [ ]:
temperature_bins_occ, temperature_bins_unocc  = eemeter.fit_temperature_bins(
    preliminary_design_matrix,
    segmentation=segmentation,
    occupancy_lookup=occupancy_lookup,
    default_bins=[-20, -15, -10, -5, 0, 5]
    # min_temperature_count=20  # default
)

design_matrices = eemeter.create_caltrack_hourly_segmented_design_matrices(
    preliminary_design_matrix,
    segmentation,
    occupancy_lookup,
    temperature_bins_occ,
    temperature_bins_unocc,
)

design_matrices['all']
Out[ ]:
meter_value hour_of_week bin_0_occupied bin_1_occupied bin_2_occupied bin_3_occupied bin_0_unoccupied bin_1_unoccupied bin_2_unoccupied bin_3_unoccupied bin_4_unoccupied weight
period_start_time
2021-11-20 00:00:00-05:00 0.973140 120 0.0 0.0 0.0 0.0 -10.0 5.0 5.0 0.5 0.0 1.0
2021-11-20 01:00:00-05:00 0.687283 121 0.0 0.0 0.0 0.0 -10.0 5.0 4.7 0.0 0.0 1.0
2021-11-20 02:00:00-05:00 1.017613 122 0.0 0.0 0.0 0.0 -10.0 5.0 4.7 0.0 0.0 1.0
2021-11-20 03:00:00-05:00 1.295297 123 0.0 0.0 0.0 0.0 -10.0 5.0 4.4 0.0 0.0 1.0
2021-11-20 04:00:00-05:00 1.381657 124 0.0 0.0 0.0 0.0 -10.0 5.0 4.2 0.0 0.0 1.0
... ... ... ... ... ... ... ... ... ... ... ... ...
2022-01-03 19:00:00-05:00 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2022-01-03 20:00:00-05:00 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2022-01-03 21:00:00-05:00 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2022-01-03 22:00:00-05:00 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2022-01-03 23:00:00-05:00 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

1080 rows × 12 columns

In [ ]:
#fit the model over the entire period
baseline_model = [eemeter.fit_caltrack_hourly_model_segment(segment_name, segment_data)
                    for segment_name, segment_data in design_matrices.items()
                ]

#create the prediction
prediction_segment_type = segment_type
prediction_segment_name_mapping = None
segment_fit = eemeter.SegmentedModel(
     prediction_segment_type=prediction_segment_type,
     prediction_segment_name_mapping=prediction_segment_name_mapping,
     segment_models=baseline_model,
     prediction_feature_processor=eemeter.caltrack_hourly_prediction_feature_processor,
     prediction_feature_processor_kwargs={
      "occupancy_lookup": occupancy_lookup,
      "occupied_temperature_bins": temperature_bins_occ,
      "unoccupied_temperature_bins": temperature_bins_unocc,
       })
#get the counterfactual

#calculate the counterfactual
metered_savings, error_bands = eemeter.metered_savings(
    segment_fit, event_meter_data, 
    temperature_data
)

metered_savings
Out[ ]:
reporting_observed counterfactual_usage metered_savings
period_start_time
2022-01-04 04:00:00-05:00 6.170382 3.603139 -2.567243
2022-01-04 05:00:00-05:00 8.278228 3.591399 -4.686829
2022-01-04 06:00:00-05:00 5.612218 6.352853 0.740635
2022-01-04 07:00:00-05:00 2.597327 7.978571 5.381244
2022-01-04 08:00:00-05:00 3.273172 6.802381 3.529210
2022-01-04 09:00:00-05:00 3.949017 5.634479 1.685463
2022-01-04 10:00:00-05:00 6.986268 5.502158 -1.484110
2022-01-04 11:00:00-05:00 6.327498 4.858611 -1.468888
In [ ]:
plt.figure(figsize=(14, 8))
plt.plot(metered_savings['reporting_observed'], label="consumption")
plt.plot(metered_savings['counterfactual_usage'], label="baseline")
plt.plot(metered_savings['metered_savings'], label="erasing")
plt.legend()
Out[ ]:
<matplotlib.legend.Legend at 0x19af3e456c0>