! pip install eemeter
Requirement already satisfied: eemeter in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (3.1.0) Requirement already satisfied: scipy in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from eemeter) (1.7.3) Requirement already satisfied: pandas in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from eemeter) (1.3.5) Requirement already satisfied: click in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from eemeter) (8.0.3) Requirement already satisfied: statsmodels in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from eemeter) (0.13.1) Requirement already satisfied: colorama in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from click->eemeter) (0.4.4) Requirement already satisfied: numpy>=1.21.0 in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from pandas->eemeter) (1.22.0) Requirement already satisfied: python-dateutil>=2.7.3 in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from pandas->eemeter) (2.8.2) Requirement already satisfied: pytz>=2017.3 in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from pandas->eemeter) (2021.3) Requirement already satisfied: patsy>=0.5.2 in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from statsmodels->eemeter) (0.5.2) Requirement already satisfied: six in c:\trusted\source\notebooks\eemeter\.venv\lib\site-packages (from patsy>=0.5.2->statsmodels->eemeter) (1.16.0)
import eemeter
import pandas as pd
import numpy as np
import datetime
from pytz import UTC
import matplotlib.pyplot as plt
meter_data = eemeter.io.meter_data_from_csv('data.csv', tz='UTC', start_col='period_start_time', value_col='sum_energy_consumed_calculated', freq='hourly')
temperature_data = eemeter.io.temperature_data_from_csv('data.csv', tz='UTC', date_col='period_start_time', temp_col='temperature_celsius', freq='hourly')
meter_data = meter_data.rename(columns={"sum_energy_consumed_calculated": "value", "period_start_time": "start" })
meter_data = meter_data.tz_convert('America/Toronto')
temperature_data = temperature_data.tz_convert('America/Toronto')
eemeter.plot_time_series(meter_data, temperature_data, figsize=(16, 4))
(<AxesSubplot:ylabel='Energy Use'>, <AxesSubplot:ylabel='Temperature'>)
ax = eemeter.plot_energy_signature(meter_data, temperature_data, figsize=(14, 8))
events = [
{'start':'2021-11-16 15:00:00','end':'2021-11-16 22:00:00'},
{'start':'2021-11-25 15:00:00','end':'2021-11-25 22:00:00'},
{'start':'2021-12-20 04:00:00','end':'2021-12-20 11:00:00'},
{'start':'2021-12-23 04:00:00','end':'2021-12-23 11:00:00'},
{'start':'2022-01-03 04:00:00','end':'2022-01-03 11:00:00'},
{'start':'2022-01-03 15:00:00','end':'2022-01-03 22:00:00'},
{'start':'2022-01-04 04:00:00','end':'2022-01-04 11:00:00'},
{'start':'2022-01-04 15:00:00','end':'2022-01-04 22:00:00'},
]
event = events[-2]
event_indices = pd.date_range(start = event['start'], end=event['end'], freq= 'H', tz='America/Toronto')
event_meter_data = meter_data.loc[meter_data.index.isin(event_indices)]
#blackout the events indices to create the baseline
blackout_event_indices = [pd.date_range(start = e['start'], end=e['end'], freq= 'H', tz='America/Toronto') for e in events]
blackout_event_indices = blackout_event_indices[0].union_many(blackout_event_indices[1:])
#the baseline is 45 days prior and 15 days post event to capture as much information as possible.
baseline_start = pd.to_datetime(event['start']).date()-pd.Timedelta(45, unit='D')
#The baseline end date can also be the event date.
#baseline_end = pd.to_datetime(event['start']).date()+pd.Timedelta(15, unit='D')
baseline_end = pd.to_datetime(event['start']).date()
#Trim the data to the baseline period
baseline_meter_data = meter_data.loc[(meter_data.index.date>=baseline_start) & (meter_data.index.date<baseline_end)].asfreq('H')
#blackout the event (this can also be the event day)
blackout_meter_data = baseline_meter_data.loc[(~baseline_meter_data.index.isin(blackout_event_indices))].asfreq('H')
#create the preliminary matrix
preliminary_design_matrix = eemeter.create_caltrack_hourly_preliminary_design_matrix(
blackout_meter_data, temperature_data
)
preliminary_design_matrix
meter_value | temperature_mean | cdd_65 | hdd_50 | n_hours_dropped | n_hours_kept | hour_of_week | |
---|---|---|---|---|---|---|---|
period_start_time | |||||||
2021-11-20 00:00:00-05:00 | 0.973140 | 0.5 | 0.0 | 49.5 | 0.0 | 1.0 | 120 |
2021-11-20 01:00:00-05:00 | 0.687283 | -0.3 | 0.0 | 50.3 | 0.0 | 1.0 | 121 |
2021-11-20 02:00:00-05:00 | 1.017613 | -0.3 | 0.0 | 50.3 | 0.0 | 1.0 | 122 |
2021-11-20 03:00:00-05:00 | 1.295297 | -0.6 | 0.0 | 50.6 | 0.0 | 1.0 | 123 |
2021-11-20 04:00:00-05:00 | 1.381657 | -0.8 | 0.0 | 50.8 | 0.0 | 1.0 | 124 |
... | ... | ... | ... | ... | ... | ... | ... |
2022-01-03 19:00:00-05:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2022-01-03 20:00:00-05:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2022-01-03 21:00:00-05:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2022-01-03 22:00:00-05:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2022-01-03 23:00:00-05:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1080 rows × 7 columns
#segment the hourly model then create the DR model. This follows a standard calTRACK hourly process
#critically here, the segmentation type is single, one model fit over the entire 45 day period
segment_type = 'single'
segmentation = eemeter.segment_time_series(
preliminary_design_matrix.index,
segment_type = segment_type
)
occupancy_lookup = eemeter.estimate_hour_of_week_occupancy(
preliminary_design_matrix,
segmentation=segmentation,
# threshold=0.65 # default
)
occupancy_lookup.astype(int).plot(yticks=[0,1],ylim=[-0.1,1.1],style={'all':'ro'}, figsize=(14, 4))
<AxesSubplot:xlabel='hour_of_week'>
temperature_bins_occ, temperature_bins_unocc = eemeter.fit_temperature_bins(
preliminary_design_matrix,
segmentation=segmentation,
occupancy_lookup=occupancy_lookup,
default_bins=[-20, -15, -10, -5, 0, 5]
# min_temperature_count=20 # default
)
design_matrices = eemeter.create_caltrack_hourly_segmented_design_matrices(
preliminary_design_matrix,
segmentation,
occupancy_lookup,
temperature_bins_occ,
temperature_bins_unocc,
)
design_matrices['all']
meter_value | hour_of_week | bin_0_occupied | bin_1_occupied | bin_2_occupied | bin_3_occupied | bin_0_unoccupied | bin_1_unoccupied | bin_2_unoccupied | bin_3_unoccupied | bin_4_unoccupied | weight | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
period_start_time | ||||||||||||
2021-11-20 00:00:00-05:00 | 0.973140 | 120 | 0.0 | 0.0 | 0.0 | 0.0 | -10.0 | 5.0 | 5.0 | 0.5 | 0.0 | 1.0 |
2021-11-20 01:00:00-05:00 | 0.687283 | 121 | 0.0 | 0.0 | 0.0 | 0.0 | -10.0 | 5.0 | 4.7 | 0.0 | 0.0 | 1.0 |
2021-11-20 02:00:00-05:00 | 1.017613 | 122 | 0.0 | 0.0 | 0.0 | 0.0 | -10.0 | 5.0 | 4.7 | 0.0 | 0.0 | 1.0 |
2021-11-20 03:00:00-05:00 | 1.295297 | 123 | 0.0 | 0.0 | 0.0 | 0.0 | -10.0 | 5.0 | 4.4 | 0.0 | 0.0 | 1.0 |
2021-11-20 04:00:00-05:00 | 1.381657 | 124 | 0.0 | 0.0 | 0.0 | 0.0 | -10.0 | 5.0 | 4.2 | 0.0 | 0.0 | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2022-01-03 19:00:00-05:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2022-01-03 20:00:00-05:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2022-01-03 21:00:00-05:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2022-01-03 22:00:00-05:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2022-01-03 23:00:00-05:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1080 rows × 12 columns
#fit the model over the entire period
baseline_model = [eemeter.fit_caltrack_hourly_model_segment(segment_name, segment_data)
for segment_name, segment_data in design_matrices.items()
]
#create the prediction
prediction_segment_type = segment_type
prediction_segment_name_mapping = None
segment_fit = eemeter.SegmentedModel(
prediction_segment_type=prediction_segment_type,
prediction_segment_name_mapping=prediction_segment_name_mapping,
segment_models=baseline_model,
prediction_feature_processor=eemeter.caltrack_hourly_prediction_feature_processor,
prediction_feature_processor_kwargs={
"occupancy_lookup": occupancy_lookup,
"occupied_temperature_bins": temperature_bins_occ,
"unoccupied_temperature_bins": temperature_bins_unocc,
})
#get the counterfactual
#calculate the counterfactual
metered_savings, error_bands = eemeter.metered_savings(
segment_fit, event_meter_data,
temperature_data
)
metered_savings
reporting_observed | counterfactual_usage | metered_savings | |
---|---|---|---|
period_start_time | |||
2022-01-04 04:00:00-05:00 | 6.170382 | 3.603139 | -2.567243 |
2022-01-04 05:00:00-05:00 | 8.278228 | 3.591399 | -4.686829 |
2022-01-04 06:00:00-05:00 | 5.612218 | 6.352853 | 0.740635 |
2022-01-04 07:00:00-05:00 | 2.597327 | 7.978571 | 5.381244 |
2022-01-04 08:00:00-05:00 | 3.273172 | 6.802381 | 3.529210 |
2022-01-04 09:00:00-05:00 | 3.949017 | 5.634479 | 1.685463 |
2022-01-04 10:00:00-05:00 | 6.986268 | 5.502158 | -1.484110 |
2022-01-04 11:00:00-05:00 | 6.327498 | 4.858611 | -1.468888 |
plt.figure(figsize=(14, 8))
plt.plot(metered_savings['reporting_observed'], label="consumption")
plt.plot(metered_savings['counterfactual_usage'], label="baseline")
plt.plot(metered_savings['metered_savings'], label="erasing")
plt.legend()
<matplotlib.legend.Legend at 0x19af3e456c0>