Many thanks @nicolaskruchten - things are clear now. I guess my confusion came due to the naming. The linear in scipy/numpy is referring to the interpolation method, while the “linear” in Plotly seems to refer to the higher-level quantile calculation method (which in itself also uses linear interpolation despite producing a different result compared to scipy/numpy). Anyway, I have included a snippet below to highlight this for anyone in the future. Great work on this library it really makes data viz much more enjoyable!
import numpy as np
from scipy.stats import iqr
def plotly_linear_quantiles(y, quantile):
"""
Based on #10 here: http://jse.amstat.org/v14n3/langford.html
METHOD 10 (“H&L-2”): The Pth percentile value is found by taking that
value with #(np + 0.5). If this is not an integer, take the interpolated
value between 'the floor' and 'the ceiling of that value'. As an example,
if S5 = (1, 2, 3, 4, 5) and p = 0.25, then #(np + 0.5) = #(1.75) and so Q1 = 1.75.
args:
y: list to calculate quantile for
quantile: requested quantile value between 0 and 1
"""
# -1 because becuase we count starting at 0
interp_val_x = len(y)*quantile + 0.5 - 1
if interp_val_x.is_integer():
# int() to remove decimal
return sorted(y)[int(interp_val_x)]
else:
return np.interp(interp_val_x, [x for x in range(len(y))], sorted(y))
def plotly_linear_IQR(y):
return plotly_linear_quantiles(y, 0.75) - plotly_linear_quantiles(y, 0.25)
# linear by default in numpy and scipy, but included for clarity
l='linear'
y = [1,2,3,4]
plotly_linear_IQR(y) # 2.0
iqr(y, interpolation=l) # 1.5
np.percentile(y, 75, interpolation=l) - np.percentile(y, 25, interpolation=l) # 1.5
y = [1,2,3,4,5]
plotly_linear_IQR(y) # 2.5
iqr(y, interpolation=l) # 2.0
np.percentile(y, 75, interpolation=l) - np.percentile(y, 25, interpolation=l) # 2.0
y = [1,2,3,4,5,6]
plotly_linear_IQR(y) # 3.0
iqr(y, interpolation=l) # 2.5
np.percentile(y, 75, interpolation=l) - np.percentile(y, 25, interpolation=l) # 2.5
y = [1,2,3,4,5,6,7]
plotly_linear_IQR(y) # 3.5
iqr(y, interpolation=l) # 3.0
np.percentile(y, 75, interpolation=l) - np.percentile(y, 25, interpolation=l) # 3.0