|
112 | 112 | "import numpy as np\n", |
113 | 113 | "\n", |
114 | 114 | "def kernel(X1, X2, l=1.0, sigma_f=1.0):\n", |
115 | | - " '''\n", |
116 | | - " Isotropic squared exponential kernel. Computes \n", |
117 | | - " a covariance matrix from points in X1 and X2.\n", |
| 115 | + " \"\"\"\n", |
| 116 | + " Isotropic squared exponential kernel.\n", |
118 | 117 | " \n", |
119 | 118 | " Args:\n", |
120 | 119 | " X1: Array of m points (m x d).\n", |
121 | 120 | " X2: Array of n points (n x d).\n", |
122 | 121 | "\n", |
123 | 122 | " Returns:\n", |
124 | | - " Covariance matrix (m x n).\n", |
125 | | - " '''\n", |
| 123 | + " (m x n) matrix.\n", |
| 124 | + " \"\"\"\n", |
126 | 125 | " sqdist = np.sum(X1**2, 1).reshape(-1, 1) + np.sum(X2**2, 1) - 2 * np.dot(X1, X2.T)\n", |
127 | 126 | " return sigma_f**2 * np.exp(-0.5 / l**2 * sqdist)" |
128 | 127 | ] |
|
200 | 199 | "from numpy.linalg import inv\n", |
201 | 200 | "\n", |
202 | 201 | "def posterior_predictive(X_s, X_train, Y_train, l=1.0, sigma_f=1.0, sigma_y=1e-8):\n", |
203 | | - " '''\n", |
| 202 | + " \"\"\"\n", |
204 | 203 | " Computes the suffifient statistics of the GP posterior predictive distribution \n", |
205 | 204 | " from m training data X_train and Y_train and n new inputs X_s.\n", |
206 | 205 | " \n", |
|
214 | 213 | " \n", |
215 | 214 | " Returns:\n", |
216 | 215 | " Posterior mean vector (n x d) and covariance matrix (n x n).\n", |
217 | | - " '''\n", |
| 216 | + " \"\"\"\n", |
218 | 217 | " K = kernel(X_train, X_train, l, sigma_f) + sigma_y**2 * np.eye(len(X_train))\n", |
219 | 218 | " K_s = kernel(X_train, X_s, l, sigma_f)\n", |
220 | 219 | " K_ss = kernel(X_s, X_s, l, sigma_f) + 1e-8 * np.eye(len(X_s))\n", |
|
398 | 397 | "from scipy.optimize import minimize\n", |
399 | 398 | "\n", |
400 | 399 | "def nll_fn(X_train, Y_train, noise, naive=True):\n", |
401 | | - " '''\n", |
| 400 | + " \"\"\"\n", |
402 | 401 | " Returns a function that computes the negative log marginal\n", |
403 | | - " likelihood for training data X_train and Y_train and given \n", |
| 402 | + " likelihood for training data X_train and Y_train and given\n", |
404 | 403 | " noise level.\n", |
405 | | - " \n", |
| 404 | + "\n", |
406 | 405 | " Args:\n", |
407 | 406 | " X_train: training locations (m x d).\n", |
408 | 407 | " Y_train: training targets (m x 1).\n", |
409 | 408 | " noise: known noise level of Y_train.\n", |
410 | | - " naive: if True use a naive implementation of Eq. (7), if \n", |
411 | | - " False use a numerically more stable implementation. \n", |
412 | | - " \n", |
| 409 | + " naive: if True use a naive implementation of Eq. (7), if\n", |
| 410 | + " False use a numerically more stable implementation.\n", |
| 411 | + "\n", |
413 | 412 | " Returns:\n", |
414 | 413 | " Minimization objective.\n", |
415 | | - " '''\n", |
| 414 | + " \"\"\"\n", |
| 415 | + " \n", |
| 416 | + " Y_train = Y_train.ravel()\n", |
| 417 | + " \n", |
416 | 418 | " def nll_naive(theta):\n", |
417 | 419 | " # Naive implementation of Eq. (7). Works well for the examples \n", |
418 | 420 | " # in this article but is numerically less stable compared to \n", |
419 | 421 | " # the implementation in nll_stable below.\n", |
420 | 422 | " K = kernel(X_train, X_train, l=theta[0], sigma_f=theta[1]) + \\\n", |
421 | 423 | " noise**2 * np.eye(len(X_train))\n", |
422 | 424 | " return 0.5 * np.log(det(K)) + \\\n", |
423 | | - " 0.5 * Y_train.T.dot(inv(K).dot(Y_train)) + \\\n", |
| 425 | + " 0.5 * Y_train.dot(inv(K).dot(Y_train)) + \\\n", |
424 | 426 | " 0.5 * len(X_train) * np.log(2*np.pi)\n", |
425 | | - "\n", |
| 427 | + " \n", |
426 | 428 | " def nll_stable(theta):\n", |
427 | 429 | " # Numerically more stable implementation of Eq. (7) as described\n", |
428 | 430 | " # in http://www.gaussianprocess.org/gpml/chapters/RW2.pdf, Section\n", |
429 | 431 | " # 2.2, Algorithm 2.1.\n", |
| 432 | + " \n", |
| 433 | + " def ls(a, b):\n", |
| 434 | + " return lstsq(a, b, rcond=-1)[0]\n", |
| 435 | + " \n", |
430 | 436 | " K = kernel(X_train, X_train, l=theta[0], sigma_f=theta[1]) + \\\n", |
431 | 437 | " noise**2 * np.eye(len(X_train))\n", |
432 | 438 | " L = cholesky(K)\n", |
433 | 439 | " return np.sum(np.log(np.diagonal(L))) + \\\n", |
434 | | - " 0.5 * Y_train.T.dot(lstsq(L.T, lstsq(L, Y_train)[0])[0]) + \\\n", |
| 440 | + " 0.5 * Y_train.dot(ls(L.T, ls(L, Y_train))) + \\\n", |
435 | 441 | " 0.5 * len(X_train) * np.log(2*np.pi)\n", |
436 | | - " \n", |
| 442 | + "\n", |
437 | 443 | " if naive:\n", |
438 | 444 | " return nll_naive\n", |
439 | 445 | " else:\n", |
|
717 | 723 | "name": "python", |
718 | 724 | "nbconvert_exporter": "python", |
719 | 725 | "pygments_lexer": "ipython3", |
720 | | - "version": "3.6.9" |
| 726 | + "version": "3.7.9" |
721 | 727 | } |
722 | 728 | }, |
723 | 729 | "nbformat": 4, |
|
0 commit comments