Data sparse quantile regression with operator-valued kernelsΒΆ
An example to illustrate data sparse quantile regression with operator-valued kernels.
We compare quantile regression with several levels of data sparsity.
Out:
Creating dataset...
Fitting...
Non-sparse QR
Non-sparse QR learning time: 5.320 s
Non-sparse QR score 0.78496
Non-sparse QR num of support vectors 100
Sparse QR
Sparse QR learning time: 10.769 s
Sparse QR score 0.76306
Sparse QR num of support vectors 25
# Author: Maxime Sangnier <maxime.sangnier@gmail.com>
# License: MIT
# -*- coding: utf-8 -*-
import time
import numpy as np
import matplotlib.pyplot as plt
from operalib import Quantile, toy_data_quantile
def main():
"""Example of multiple quantile regression."""
print("Creating dataset...")
probs = np.linspace(0.1, 0.9, 5) # Quantile levels of interest
x_train, y_train, _ = toy_data_quantile(100, random_state=0)
x_test, y_test, z_test = toy_data_quantile(1000, probs=probs,
random_state=1)
print("Fitting...")
methods = {'Non-sparse QR':
Quantile(probs=probs, kernel='DGauss', lbda=1e-2, gamma=8.,
gamma_quantile=1e-2),
'Sparse QR':
Quantile(probs=probs, kernel='DGauss', lbda=1e-2, gamma=8.,
gamma_quantile=1e-2, eps=2.5)}
# Fit on training data
for name, reg in sorted(methods.items()):
print(name)
start = time.time()
reg.fit(x_train, y_train)
coefs = reg.model_["coefs"].reshape(y_train.size, probs.size)
n_sv = np.sum(np.linalg.norm(coefs, axis=1) *
reg.lbda / y_train.size > 1e-5)
print('%s learning time: %.3f s' % (name, time.time() - start))
print('%s score %.5f' % (name, reg.score(x_test, y_test)))
print('%s num of support vectors %d' % (name, n_sv))
# Plot the estimated conditional quantiles
plt.figure(figsize=(12, 7))
for i, method in enumerate(sorted(methods.keys())):
plt.subplot(2, 2, i + 1)
plt.plot(x_train, y_train, '.')
plt.gca().set_prop_cycle(None)
for quantile in methods[method].predict(x_test):
plt.plot(x_test, quantile, '-')
plt.gca().set_prop_cycle(None)
for prob, quantile in zip(probs, z_test):
plt.plot(x_test, quantile, '--',
label="theoretical {0:0.2f}".format(prob))
plt.title(method)
plt.legend(fontsize=8)
coefs = methods[method].model_["coefs"].reshape(y_train.size,
probs.size)
plt.subplot(2, 2, 2 + i + 1)
plt.plot(np.linalg.norm(coefs, axis=1))
plt.title("Norm of dual coefs for each point")
plt.show()
if __name__ == '__main__':
main()
Total running time of the script: ( 0 minutes 17.301 seconds)