"""
Fractional differentiation is a technique to make a time series stationary but also
retain as much memory as possible. This is done by differencing by a positive real
number. Fractionally differenced series can be used as a feature in machine learning
process.
"""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller
class FractionalDifferentiation:
"""
FractionalDifferentiation class encapsulates the functions that can
be used to compute fractionally differentiated series.
"""
@staticmethod
def get_weights(diff_amt: float, size: int) -> np.ndarray:
"""
The helper function generates weights that are used to compute fractionally
differentiated series. It computes the weights that get used in the computation
of fractionally differentiated series. This generates a non-terminating series
that approaches zero asymptotically. The side effect of this function is that
it leads to negative drift "caused by an expanding window's added weights"
(see page 83 AFML).
When ``diff_amt`` is real (non-integer) positive number then it preserves memory.
The book does not discuss what should be expected if d is a negative real
number. Conceptually (from set theory) negative d leads to set of negative
number of elements. And that translates into a set whose elements can be
selected more than once or as many times as one chooses (multisets with
unbounded multiplicity) - see http://faculty.uml.edu/jpropp/msri-up12.pdf.
Parameters
----------
diff_amt : float
Differencing amount.
size : int
Length of the series.
Notes
-----
The algorithm below executes the iterative estimation (section 5.4.2, page 78).
"""
# The algorithm below executes the iterative estimation (section 5.4.2, page 78)
weights = [1.0] # create an empty list and initialize the first element with 1.
for k in range(1, size):
weights_ = -weights[-1] * (diff_amt - k + 1) / k # compute the next weight
weights.append(weights_)
# Now, reverse the list, convert into a numpy column vector
weights = np.array(weights[::-1]).reshape(-1, 1)
return weights
@staticmethod
def frac_diff(series: pd.DataFrame, diff_amt: float, thresh: float = 0.01) -> pd.DataFrame:
"""
This function computes fractionally differentiated series.
The steps are as follows:
- Compute weights (this is a one-time exercise)
- Iteratively apply the weights to the price series and generate output points
Parameters
----------
series : pd.DataFrame
A time series that needs to be differenced
diff_amt : float
Differencing amount
thresh : float
Threshold or epsilon
Returns
-------
pd.DataFrame
Differenced series
Notes
-----
Advances in Financial Machine Learning, Chapter 5, section 5.5, page 82.
This is the expanding window variant of the fracDiff algorithm:
* For thresh = 0.01, nothing is skipped
* ``diff_amt`` can be any positive fractional, not necessarility bounded [0, 1]
References:
* https://www.wiley.com/en-us/Advances+in+Financial+Machine+Learning-p-9781119482086
* https://wwwf.imperial.ac.uk/~ejm/M3S8/Problems/hosking81.pdf
* https://en.wikipedia.org/wiki/Fractional_calculus
"""
# 1. Compute weights for the longest series
weights = get_weights(diff_amt, series.shape[0])
# 2. Determine initial calculations to be skipped based on weight-loss threshold
weights_ = np.cumsum(abs(weights))
weights_ /= weights_[-1]
skip = weights_[weights_ > thresh].shape[0]
# 3. Apply weights to values
output_df = {}
for name in series.columns:
series_f = series[[name]].ffill().dropna()
output_df_ = pd.Series(index=series.index, dtype="float64")
for iloc in range(skip, series_f.shape[0]):
loc = series_f.index[iloc]
# At this point all entries are non-NAs so no need for the following check
# if np.isfinite(series.loc[loc, name]):
output_df_[loc] = np.dot(weights[-(iloc + 1) :, :].T, series_f.loc[:loc])[0, 0]
output_df[name] = output_df_.copy(deep=True)
output_df = pd.concat(output_df, axis=1)
return output_df
@staticmethod
def get_weights_ffd(diff_amt, thresh, lim):
"""
The helper function generates weights that are used to compute fractionally
differentiate dseries. It computes the weights that get used in the computation
of fractionally differentiated series. The series is of fixed width and same
weights (generated by this function) can be used when creating fractional
differentiated series.
This makes the process more efficient. But the side-effect is that the
fractionally differentiated series is skewed and has excess kurtosis. In
other words, it is not Gaussian any more.
The discussion of positive and negative d is similar to that in ``get_weights``
(see the function ``get_weights``).
Parameters
----------
diff_amt : float
Differencing amount.
thresh : float
Threshold for minimum weight.
lim : int
Maximum length of the weight vector.
Returns
-------
np.ndarray
Weight vector.
Notes
-----
Advances in Financial Machine Learning, Chapter 5, section 5.4.2, page 83.
"""
weights = [1.0]
k = 1
# The algorithm below executes the iterativetive estimation (section 5.4.2, page 78)
# The output weights array is of the indicated length (specified by lim)
ctr = 0
while True:
# compute the next weight
weights_ = -weights[-1] * (diff_amt - k + 1) / k
if abs(weights_) < thresh:
break
weights.append(weights_)
k += 1
ctr += 1
if ctr == lim - 1: # if we have reached the size limit, exit the loop
break
# Now, reverse the list, convert into a numpy column vector
weights = np.array(weights[::-1]).reshape(-1, 1)
return weights
@staticmethod
def frac_diff_ffd(series: pd.DataFrame, diff_amt: float, thresh: float = 1e-5) -> pd.DataFrame:
"""
The function computes the fractionally differentiated series.
The steps are as follows:
- Compute weights (this is a one-time exercise)
- Iteratively apply the weights to the price series and generate output points
Parameters
----------
series : pd.DataFrame
A time series that needs to be differenced.
diff_amt : float
Differencing amount.
thresh : float
Threshold for minimum weight.
Returns
-------
pd.DataFrame
A data frame of differenced series.
Notes
-----
Advances in Financial Machine Learning, Chapter 5, section 5.5, page 83.
Constant width window (new solution):
* Threshold ``thresh`` determines the cut-off weight for the window.
* ``diff_amt`` can be any positive fractional, not necessarity bounded [0, 1].
References:
* https://www.wiley.com/en-us/Advances+in+Financial+Machine+Learning-p-9781119482086
* https://wwwf.imperial.ac.uk/~ejm/M3S8/Problems/hosking81.pdf
* https://en.wikipedia.org/wiki/Fractional_calculus
"""
# 1) Compute weights for the longest series
weights = get_weights_ffd(diff_amt, thresh, series.shape[0])
width = len(weights) - 1
# 2) Apply weights to values
# 2.1) Start by creating a dictionary to hold all the fractionally differenced series
output_df = {}
# 2.2) compute fractionally differenced series for each stock
for name in series.columns:
series_f = series[[name]].ffill().dropna()
temp_df_ = pd.Series(index=series.index, dtype="float64")
for iloc1 in range(width, series_f.shape[0]):
loc0 = series_f.index[iloc1 - width]
loc1 = series.index[iloc1]
# At this point all entries are non-NAs, hence no need for the following check
# if np.isfinite(series.loc[loc1, name]):
temp_df_[loc1] = np.dot(weights.T, series_f.loc[loc0:loc1])[0, 0]
output_df[name] = temp_df_.copy(deep=True)
# transform the dictionary into a data frame
output_df = pd.concat(output_df, axis=1)
return output_df
def get_weights(diff_amt, size):
"""This is a pass-through function"""
return FractionalDifferentiation.get_weights(diff_amt, size)
def frac_diff(series, diff_amt, thresh=0.01):
"""This is a pass-through function"""
return FractionalDifferentiation.frac_diff(series, diff_amt, thresh)
def get_weights_ffd(diff_amt, thresh, lim):
"""This is a pass-through function"""
return FractionalDifferentiation.get_weights_ffd(diff_amt, thresh, lim)
[docs]
def frac_diff_ffd(series: pd.Series, diff_amt: float, thresh: float = 1e-5) -> pd.DataFrame:
"""
The function computes the fractionally differentiated series.
The steps are as follows:
- Compute weights (this is a one-time exercise)
- Iteratively apply the weights to the price series and generate output points
Parameters
----------
series : pd.DataFrame
A time series that needs to be differenced.
diff_amt : float
Differencing amount.
thresh : float
Threshold for minimum weight.
Returns
-------
pd.DataFrame
A data frame of differenced series.
Notes
-----
Advances in Financial Machine Learning, Chapter 5, section 5.5, page 83.
Constant width window (new solution):
* Threshold ``thresh`` determines the cut-off weight for the window.
* ``diff_amt`` can be any positive fractional, not necessarity bounded [0, 1].
References:
* https://www.wiley.com/en-us/Advances+in+Financial+Machine+Learning-p-9781119482086
* https://wwwf.imperial.ac.uk/~ejm/M3S8/Problems/hosking81.pdf
* https://en.wikipedia.org/wiki/Fractional_calculus
"""
return FractionalDifferentiation.frac_diff_ffd(series, diff_amt, thresh)
[docs]
def plot_min_ffd(series: pd.DataFrame) -> plt.plot:
"""
This function plots the graph to find the minimum D value that passes the ADF test.
It allows to determine d - the amount of memory that needs to be removed to achieve
stationarity. This function covers the case of 0 < d << 1, when the original series is
"mildly non-stationary."
The right y-axis on the plot is the ADF statistic computed on the input series downsampled
to a daily frequency.
The x-axis displays the d value used to generate the series on which the ADF statistic is computed.
The left y-axis plots the correlation between the original series (d=0) and the differentiated
series at various d values.
Parameters
----------
series : pd.DataFrame
Dataframe that contains a 'close' column with prices to use.
Returns
-------
plt.plot
A plot that can be displayed or used to obtain resulting data.
Notes
-----
Advances in Financial Machine Learning, Chapter 5, section 5.6, page 85.
Examples on how to interpret the results of this function are available in the corresponding part
in the book Advances in Financial Machine Learning.
References: https://www.wiley.com/en-us/Advances+in+Financial+Machine+Learning-p-9781119482086
"""
results = pd.DataFrame(columns=["adfStat", "pVal", "lags", "nObs", "95% conf", "corr"])
# Iterate through d values with 0.1 step
for d_value in np.linspace(0, 1, 11):
close_prices = np.log(series[["close"]]).resample("1D").last() # Downcast to daily obs
close_prices.dropna(inplace=True)
# Applying fractional differentiation
differenced_series = frac_diff_ffd(close_prices, diff_amt=d_value, thresh=0.01).dropna()
# Correlation between the original and the differentiated series
corr = np.corrcoef(close_prices.loc[differenced_series.index, "close"], differenced_series["close"])[0, 1]
# Applying ADF
differenced_series = adfuller(differenced_series["close"], maxlag=1, regression="c", autolag=None)
# Results to dataframe
results.loc[d_value] = list(differenced_series[:4]) + [differenced_series[4]["5%"]] + [corr]
# Plotting
plot = results[["adfStat", "corr"]].plot(secondary_y="adfStat", figsize=(10, 8))
plt.axhline(results["95% conf"].mean(), linewidth=1, color="r", linestyle="dotted")
return plot