"""
Triple Barriers Method combine with Meta Labeling Method
This is the implementation of "Chapter 3: Labeling" in the book "Advances in Financial Machine Learning".
This module combine Meta Labeling with Triple Barrier Labeling Method to label events that will be used
to train the ML algorithm.
"""
from typing import List, Optional, Union
import numpy as np
import pandas as pd
from mlfinpy.util.multiprocess import mp_pandas_obj
# Snippet 3.2, page 45, Triple Barrier Labeling Method
def triple_barriers(close: pd.Series, events: pd.Series, pt_sl: np.array, molecule: np.array) -> pd.DataFrame:
# pragma: no cover
"""
Triple Barrier Labeling Method
This function applies the triple-barrier labeling method. It works on a set of datetime index values (molecule).
This allows the program to parallelize the processing.Mainly it returns a DataFrame of timestamps
regarding the time when the first barriers were reached.
Parameters
----------
close : pd.Series
Close prices series.
events : pd.Series
Indices that signify "events" (see cusum_filter function for more details).
pt_sl : np.array
Element 0, indicates the profit taking level; Element 1 is stop loss level.
molecule : np.array
A set of datetime index values for processing.
Returns
-------
out : pd.DataFrame
Timestamps of when first barrier was touched
Note
----
Advances in Financial Machine Learning, Snippet 3.2, page 45.
"""
# Apply stop loss/profit taking, if it takes place before t1 (end of event)
events_ = events.loc[molecule]
out = events_[["t1"]].copy(deep=True)
profit_taking_multiple = pt_sl[0]
stop_loss_multiple = pt_sl[1]
# Profit taking active
if profit_taking_multiple > 0:
profit_taking = profit_taking_multiple * events_["trgt"]
else:
profit_taking = pd.Series(index=events.index) # NaNs
# Stop loss active
if stop_loss_multiple > 0:
stop_loss = -stop_loss_multiple * events_["trgt"]
else:
stop_loss = pd.Series(index=events.index) # NaNs
out["pt"] = pd.Series(dtype=events.index.dtype)
out["sl"] = pd.Series(dtype=events.index.dtype)
# Get events
for loc, vertical_barrier in events_["t1"].fillna(close.index[-1]).items():
closing_prices = close[loc:vertical_barrier] # Path prices for a given trade
cum_returns = (closing_prices / close[loc] - 1) * events_.at[loc, "side"] # Path returns
out.at[loc, "sl"] = cum_returns[cum_returns < stop_loss[loc]].index.min() # Earliest stop loss date
out.at[loc, "pt"] = cum_returns[cum_returns > profit_taking[loc]].index.min() # Earliest profit taking date
return out
# Snippet 3.4 page 49, Adding a Vertical Barrier
[docs]
def add_vertical_barrier(t_events, close, num_days=0, num_hours=0, num_minutes=0, num_seconds=0):
"""
Adding a Vertical Barrier
For each index in `t_events`, it finds the timestamp of the next price bar at or immediately after
a number of days num_days. This vertical barrier can be passed as an optional argument `t1` in `get_events`.
This function creates a series that has all the timestamps of when the vertical barrier would be reached.
Parameters
----------
t_events : pd.Series
Series of events timestamps from the filters e.g. Cusum filter, Z-score filter.
close : pd.Series
Close prices series.
num_days : int, optional
Number of days to add for vertical barrier.
num_hours : int, optional
Number of hours to add for vertical barrier.
num_minutes : int, optional
Number of minutes to add for vertical barrier.
num_seconds : int, optional
Number of seconds to add for vertical barrier.
Returns
-------
verticle_barriers : pd.Series
Timestamps of vertical barriers.
Notes
------
Advances in Financial Machine Learning, Snippet 3.4, page 49.
"""
# Create a timedelta object based on the input parameters
timedelta = pd.Timedelta(
"{} days, {} hours, {} minutes, {} seconds".format(num_days, num_hours, num_minutes, num_seconds)
)
# Find index to closest to vertical barrier
nearest_index = close.index.searchsorted(t_events + timedelta)
# Exclude indexes which are outside the range of close price index
nearest_index = nearest_index[nearest_index < close.shape[0]]
# Find price index closest to vertical barrier time stamp
nearest_timestamp = close.index[nearest_index]
filtered_events = t_events[: nearest_index.shape[0]]
# Create a series with the vertical barrier timestamps
vertical_barriers = pd.Series(data=nearest_timestamp, index=filtered_events)
return vertical_barriers
# Snippet 3.3 -> 3.6 page 50, Getting the Time of the First Touch, with Meta Labels
[docs]
def get_events(
close: pd.Series,
t_events: pd.Series,
pt_sl: List[float],
target: pd.Series,
min_ret: float,
num_threads: int,
vertical_barrier_times: Union[pd.Series, bool] = False,
side_prediction: Optional[pd.Series] = None,
verbose: bool = True,
) -> pd.DataFrame:
"""
Advances in Financial Machine Learning, Snippet 3.6 page 50.
Getting the Time of the First Touch, with Meta Labels
This function is orchestrator to meta-label the data, in conjunction with the Triple Barrier Method.
Parameters
----------
close : pd.Series
Close prices
t_events : pd.Series
of t_events. These are timestamps that will seed every triple barrier.
These are the timestamps selected by the sampling procedures discussed in Chapter 2, Section 2.5.
Eg: CUSUM Filter
pt_sl : List[float]
Element 0, indicates the profit taking level; Element 1 is stop loss level.
A non-negative float that sets the width of the two barriers. A 0 value means that the respective
horizontal barrier (profit taking and/or stop loss) will be disabled.
target : pd.Series
of values that are used (in conjunction with pt_sl) to determine the width
of the barrier. In this program this is daily volatility series.
min_ret : float
The minimum target return required for running a triple barrier search.
num_threads : int
The number of threads concurrently used by the function.
vertical_barrier_times : Union[pd.Series, bool]
A pandas series with the timestamps of the vertical barriers.
We pass a False when we want to disable vertical barriers.
side_prediction : Optional[pd.Series]
Side of the bet (long/short) as decided by the primary model
verbose : bool
Flag to report progress on asynch jobs
Returns
-------
events : pd.DataFrame
Dataframe of first touch events with meta-labels.
- events.index is event's starttime
- events['t1'] is event's endtime
- events['trgt'] is event's target
- events['side'] (optional) implies the algo's position side
- events['pt'] is profit taking multiple
- events['sl'] is stop loss multiple
"""
# 1) Get target
target = target.reindex(t_events)
target = target[target > min_ret] # min_ret
# 2) Get vertical barrier (max holding period)
if vertical_barrier_times is False:
vertical_barrier_times = pd.Series(pd.NaT, index=t_events, dtype=t_events.dtype)
# 3) Form events object, apply stop loss on vertical barrier
if side_prediction is None:
side_ = pd.Series(1.0, index=target.index)
pt_sl_ = [pt_sl[0], pt_sl[0]]
else:
side_ = side_prediction.reindex(target.index) # Subset side_prediction on target index.
pt_sl_ = pt_sl[:2]
# Create a new df with [v_barrier, target, side] and drop rows that are NA in target
events = pd.concat({"t1": vertical_barrier_times, "trgt": target, "side": side_}, axis=1)
events = events.dropna(subset=["trgt"])
# Apply Triple Barrier
first_touch_dates = mp_pandas_obj(
func=triple_barriers,
pd_obj=("molecule", events.index),
num_threads=num_threads,
close=close,
events=events,
pt_sl=pt_sl_,
verbose=verbose,
)
for ind in events.index:
events.at[ind, "t1"] = first_touch_dates.loc[ind, :].dropna().min()
if side_prediction is None:
events = events.drop("side", axis=1)
# Add profit taking and stop loss multiples for vertical barrier calculations
events["pt"] = pt_sl[0]
events["sl"] = pt_sl[1]
return events
# Snippet 3.9, pg 55, Question 3.3
def barrier_touched(out_df: pd.DataFrame, events: pd.DataFrame) -> pd.DataFrame:
"""
Adjust the getBins function (Snippet 3.7) to return a 0 whenever the vertical barrier is the one touched first.
Top horizontal barrier: 1
Bottom horizontal barrier: -1
Vertical barrier: 0
Parameters
----------
out_df : pd.DataFrame
Returns and target.
events : pd.DataFrame
The original events data frame. Contains the pt sl multiples needed here.
Returns
-------
pd.DataFrame
Returns, target, and labels.
Notes
-----
Advances in Financial Machine Learning, Snippet 3.9, page 55, Question 3.3.
"""
store = []
for date_time, values in out_df.iterrows():
ret = values["ret"]
target = values["trgt"]
pt_level_reached = ret > np.log(1 + target) * events.loc[date_time, "pt"]
sl_level_reached = ret < -np.log(1 + target) * events.loc[date_time, "sl"]
if ret > 0.0 and pt_level_reached:
# Top barrier reached
store.append(1)
elif ret < 0.0 and sl_level_reached:
# Bottom barrier reached
store.append(-1)
else:
# Vertical barrier reached
store.append(0)
# Save to 'bin' column and return
out_df["bin"] = store
return out_df
# Snippet 3.4 -> 3.7, page 51, Labeling for Side & Size with Meta Labels
[docs]
def get_bins(triple_barrier_events: pd.DataFrame, close: pd.Series) -> pd.DataFrame:
"""
Labeling for Side & Size with Meta Labels
Compute event's outcome (including side information, if provided).
events is a DataFrame where:
Now the possible values for labels in out['bin'] are {0,1}, as opposed to whether to take the bet or pass,
a purely binary prediction. When the predicted label the previous feasible values {−1,0,1}.
The ML algorithm will be trained to decide is 1, we can use the probability of this secondary prediction
to derive the size of the bet, where the side (sign) of the position has been set by the primary model.
Parameters
----------
triple_barrier_events : pd.DataFrame
DataFrame returned by 'get_events' with columns:
- index: event starttime
- vertical_barriers: event endtime
- trgt: event target
- side (optional): position side
Case 1: ('side' not in events): bin in (-1,1) <-label by price action.
Case 2: ('side' in events): bin in (0,1) <-label by pnl (meta-labeling).
close : pd.Series
Close prices series.
Returns
-------
out_df : pd.DataFrame
Meta-labeled events.
Notes
-----
Advances in Financial Machine Learning, Snippet 3.7, page 51.
"""
# 1) Align prices with their respective events
events_ = triple_barrier_events.dropna(subset=["t1"])
all_dates = events_.index.union(other=events_["t1"].array).drop_duplicates()
prices = close.reindex(all_dates, method="bfill")
# 2) Create out DataFrame
out_df = pd.DataFrame(index=events_.index)
# Need to take the log returns, else your results will be skewed for short positions
out_df["ret"] = np.log(prices.loc[events_["t1"].array].array) - np.log(prices.loc[events_.index])
out_df["trgt"] = events_["trgt"]
# Meta labeling: Events that were correct will have pos returns
if "side" in events_:
out_df["ret"] = out_df["ret"] * events_["side"] # meta-labeling
# Added code: label 0 when vertical barrier reached
out_df = barrier_touched(out_df, triple_barrier_events)
# Meta labeling: label incorrect events with a 0
if "side" in events_:
out_df.loc[out_df["ret"] <= 0, "bin"] = 0
# Transform the log returns back to normal returns.
out_df["ret"] = np.exp(out_df["ret"]) - 1
# Add the side to the output. This is useful for when a meta label model must be fit
tb_cols = triple_barrier_events.columns
if "side" in tb_cols:
out_df["side"] = triple_barrier_events["side"]
return out_df
# Snippet 3.8 page 54
[docs]
def drop_labels(events: pd.DataFrame, min_pct: float = 0.05) -> pd.DataFrame:
"""
This function recursively eliminates rare observations.
Parameters
----------
events : pd.DataFrame
Events.
min_pct : float, optional
A fraction used to decide if the observation occurs less than that fraction.
Defaults to .05.
Returns
-------
pd.DataFrame
Events.
Notes
-----
Advances in Financial Machine Learning, Snippet 3.8 page 54.
"""
# Apply weights, drop labels with insufficient examples
while True:
df0 = events["bin"].value_counts(normalize=True)
if df0.min() > min_pct or df0.shape[0] < 3:
break
print("dropped label: ", df0.idxmin(), df0.min())
events = events[events["bin"] != df0.idxmin()]
return events