|
| 1 | +--- |
| 2 | +jupytext: |
| 3 | + formats: md:myst |
| 4 | + text_representation: |
| 5 | + extension: .md |
| 6 | + format_name: myst |
| 7 | +kernelspec: |
| 8 | + display_name: Python 3 |
| 9 | + language: python |
| 10 | + name: python3 |
| 11 | +--- |
| 12 | + |
1 | 13 | (Chap_SMM)= |
2 | 14 | # Simulated Method of Moments Estimation |
3 | 15 |
|
@@ -317,16 +329,310 @@ In this section, we will use SMM to estimate parameters of the models from the { |
317 | 329 | (SecSMM_CodeExmp_MacrTest)= |
318 | 330 | ### Fitting a truncated normal to intermediate macroeconomics test scores |
319 | 331 |
|
320 | | -Let's revisit the problem from the MLE and GMM notebooks of fitting a truncated normal distribution to intermediate macroeconomics test scores. The data are in the text file [`Econ381totpts.txt`](https://github.com/OpenSourceEcon/CompMethods/blob/main/data/smm/Econ381totpts.txt). Recall that these test scores are between 0 and 450. The figure below shows a histogram of the data, as well as three truncated normal PDF's with different values for $\mu$ and $\sigma$. The black line is the maximum likelihood estimate of $\mu$ and $\sigma$ of the truncated normal pdf from the {ref}`Chap_MaxLikeli` chapter. The red and the green lines are just the PDF's of two "arbitrarily" chosen combinations of the truncated normal parameters $\mu$ and $\sigma$. |
321 | | - |
322 | | -```{figure} ../../../images/smm/MLEplots.png |
| 332 | +Let's revisit the problem from the MLE and GMM notebooks of fitting a truncated normal distribution to intermediate macroeconomics test scores. The data are in the text file [`Econ381totpts.txt`](https://github.com/OpenSourceEcon/CompMethods/blob/main/data/smm/Econ381totpts.txt). Recall that these test scores are between 0 and 450. {numref}`Figure %s <FigSMM_EconScoreTruncNorm>` below shows a histogram of the data, as well as three truncated normal PDF's with different values for $\mu$ and $\sigma$. The black line is the maximum likelihood estimate of $\mu$ and $\sigma$ of the truncated normal pdf from the {ref}`Chap_MaxLikeli` chapter. The red, green, and black lines are just the PDF's of two "arbitrarily" chosen combinations of the truncated normal parameters $\mu$ and $\sigma$.[^TruncNorm] |
| 333 | + |
| 334 | +```{code-cell} ipython3 |
| 335 | +:tags: ["hide-input", "remove-output"] |
| 336 | +
|
| 337 | +# Import the necessary libraries |
| 338 | +import numpy as np |
| 339 | +import scipy.stats as sts |
| 340 | +import requests |
| 341 | +import matplotlib.pyplot as plt |
| 342 | +
|
| 343 | +
|
| 344 | +# Define function that generates values of a normal pdf |
| 345 | +def trunc_norm_pdf(xvals, mu, sigma, cut_lb, cut_ub): |
| 346 | + ''' |
| 347 | + -------------------------------------------------------------------- |
| 348 | + Generate pdf values from the normal pdf with mean mu and standard |
| 349 | + deviation sigma. If the cutoff is given, then the PDF values are |
| 350 | + inflated upward to reflect the zero probability on values above the |
| 351 | + cutoff. If there is no cutoff given, this function does the same |
| 352 | + thing as sp.stats.norm.pdf(x, loc=mu, scale=sigma). |
| 353 | + -------------------------------------------------------------------- |
| 354 | + INPUTS: |
| 355 | + xvals = (N,) vector, values of the normally distributed random |
| 356 | + variable |
| 357 | + mu = scalar, mean of the normally distributed random variable |
| 358 | + sigma = scalar > 0, standard deviation of the normally distributed |
| 359 | + random variable |
| 360 | + cut_lb = scalar or string, ='None' if no cutoff is given, otherwise |
| 361 | + is scalar lower bound value of distribution. Values below |
| 362 | + this value have zero probability |
| 363 | + cut_ub = scalar or string, ='None' if no cutoff is given, otherwise |
| 364 | + is scalar upper bound value of distribution. Values above |
| 365 | + this value have zero probability |
| 366 | +
|
| 367 | + OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: None |
| 368 | +
|
| 369 | + OBJECTS CREATED WITHIN FUNCTION: |
| 370 | + prob_notcut = scalar |
| 371 | + pdf_vals = (N,) vector, normal PDF values for mu and sigma |
| 372 | + corresponding to xvals data |
| 373 | +
|
| 374 | + FILES CREATED BY THIS FUNCTION: None |
| 375 | +
|
| 376 | + RETURNS: pdf_vals |
| 377 | + -------------------------------------------------------------------- |
| 378 | + ''' |
| 379 | + if cut_ub == 'None' and cut_lb == 'None': |
| 380 | + prob_notcut = 1.0 |
| 381 | + elif cut_ub == 'None' and cut_lb != 'None': |
| 382 | + prob_notcut = 1.0 - sts.norm.cdf(cut_lb, loc=mu, scale=sigma) |
| 383 | + elif cut_ub != 'None' and cut_lb == 'None': |
| 384 | + prob_notcut = sts.norm.cdf(cut_ub, loc=mu, scale=sigma) |
| 385 | + elif cut_ub != 'None' and cut_lb != 'None': |
| 386 | + prob_notcut = (sts.norm.cdf(cut_ub, loc=mu, scale=sigma) - |
| 387 | + sts.norm.cdf(cut_lb, loc=mu, scale=sigma)) |
| 388 | +
|
| 389 | + pdf_vals = ((1/(sigma * np.sqrt(2 * np.pi)) * |
| 390 | + np.exp( - (xvals - mu)**2 / (2 * sigma**2))) / |
| 391 | + prob_notcut) |
| 392 | +
|
| 393 | + return pdf_vals |
| 394 | +
|
| 395 | +
|
| 396 | +# Download and save the data file Econ381totpts.txt |
| 397 | +url = ('https://raw.githubusercontent.com/OpenSourceEcon/CompMethods/' + |
| 398 | + 'main/data/smm/Econ381totpts.txt') |
| 399 | +data_file = requests.get(url, allow_redirects=True) |
| 400 | +open('../../../data/smm/Econ381totpts.txt', 'wb').write(data_file.content) |
| 401 | +
|
| 402 | +# Load the data as a NumPy array |
| 403 | +data = np.loadtxt('../../../data/smm/Econ381totpts.txt') |
| 404 | +
|
| 405 | +num_bins = 30 |
| 406 | +count, bins, ignored = plt.hist( |
| 407 | + data, num_bins, density=True, edgecolor='k', label='data' |
| 408 | +) |
| 409 | +plt.title('Econ 381 scores: 2011-2012', fontsize=20) |
| 410 | +plt.xlabel(r'Total points') |
| 411 | +plt.ylabel(r'Percent of scores') |
| 412 | +plt.xlim([0, 550]) # This gives the xmin and xmax to be plotted" |
| 413 | +
|
| 414 | +# Plot smooth line with distribution 1 |
| 415 | +dist_pts = np.linspace(0, 450, 500) |
| 416 | +mu_1 = 300 |
| 417 | +sig_1 = 30 |
| 418 | +plt.plot(dist_pts, trunc_norm_pdf(dist_pts, mu_1, sig_1, 0, 450), |
| 419 | + linewidth=2, color='red', label=f"$\mu$={mu_1},$\sigma$={sig_1}") |
| 420 | +
|
| 421 | +# Plot smooth line with distribution 2 |
| 422 | +mu_2 = 400 |
| 423 | +sig_2 = 70 |
| 424 | +plt.plot(dist_pts, trunc_norm_pdf(dist_pts, mu_2, sig_2, 0, 450), |
| 425 | + linewidth=2, color='green', label=f"$\mu$={mu_2},$\sigma$={sig_2}") |
| 426 | +
|
| 427 | +# Plot smooth line with distribution 3 |
| 428 | +mu_3 = 558 |
| 429 | +sig_3 = 176 |
| 430 | +plt.plot(dist_pts, trunc_norm_pdf(dist_pts, mu_3, sig_3, 0, 450), |
| 431 | + linewidth=2, color='black', label=f"$\mu$={mu_3},$\sigma$={sig_3}") |
| 432 | +plt.legend(loc='upper left') |
| 433 | +
|
| 434 | +plt.show() |
| 435 | +``` |
| 436 | + |
| 437 | +```{figure} ../../../images/smm/Econ381scores_truncnorm.png |
323 | 438 | --- |
324 | 439 | height: 500px |
325 | | -name: FigMLEplots |
| 440 | +name: FigSMM_EconScoreTruncNorm |
326 | 441 | --- |
327 | 442 | Macroeconomic midterm scores and three truncated normal distributions |
328 | 443 | ``` |
329 | 444 |
|
| 445 | +#### Two moments, identity weighting matrix |
| 446 | +Let's try estimating the parameters $\mu$ and $\sigma$ from the truncated normal distribution by SMM, assuming that we know the cutoff values for the distribution of scores $c_{lb}=0$ and $c_{ub}=450$. What moments should we use? Let's try the mean and variance of the data. These two statistics of the data are defined by: |
| 447 | + |
| 448 | +$$ mean(scores_i) = \frac{1}{N}\sum_{i=1}^N scores_i $$ |
| 449 | + |
| 450 | +$$ var(scores_i) = \frac{1}{N-1}\sum_{i=1}^{N} \left(scores_i - mean(scores_i)\right)^2 $$ |
| 451 | + |
| 452 | +So the data moment vector $m(x)$ for SMM has two elements $R=2$ and is the following. |
| 453 | + |
| 454 | +$$ m(scores_i) \equiv \begin{bmatrix} mean(scores_i) \\ var(scores_i) \end{bmatrix} $$ |
| 455 | + |
| 456 | +And the model moment vector $m(x|\theta)$ for SMM is the following. |
| 457 | + |
| 458 | +$$ m(scores_i|\mu,\sigma) \equiv \begin{bmatrix} mean(scores_i|\mu,\sigma) \\ var(scores_i|\mu,\sigma) \end{bmatrix} $$ |
| 459 | + |
| 460 | +But let's assume that we need to simulate the data from the model (test scores) $S$ times in order to get the model moments. In this case, we don't need to simulate. But we will do so to show how SMM works. |
| 461 | + |
| 462 | +```{code-cell} ipython3 |
| 463 | +:tags: ["remove-output"] |
| 464 | +
|
| 465 | +# Import packages and load the data |
| 466 | +import numpy as np |
| 467 | +import numpy.random as rnd |
| 468 | +import numpy.linalg as lin |
| 469 | +import scipy.stats as sts |
| 470 | +import scipy.integrate as intgr |
| 471 | +import scipy.optimize as opt |
| 472 | +import matplotlib |
| 473 | +import matplotlib.pyplot as plt |
| 474 | +from mpl_toolkits.mplot3d import Axes3D |
| 475 | +from matplotlib import cm |
| 476 | +cmap1 = matplotlib.cm.get_cmap('summer') |
| 477 | +
|
| 478 | +# Download and save the data file Econ381totpts.txt |
| 479 | +url = ('https://raw.githubusercontent.com/OpenSourceEcon/CompMethods/' + |
| 480 | + 'main/data/smm/Econ381totpts.txt') |
| 481 | +data_file = requests.get(url, allow_redirects=True) |
| 482 | +open('../../../data/smm/Econ381totpts.txt', 'wb').write(data_file.content) |
| 483 | +
|
| 484 | +# Load the data as a NumPy array |
| 485 | +data = np.loadtxt('../../../data/smm/Econ381totpts.txt') |
| 486 | +``` |
| 487 | + |
| 488 | +Let random variable $y\sim N(\mu,\sigma)$ be distributed normally with mean $\mu$ and standard deviation $\sigma$ with PDF given by $\phi(y|\mu,\sigma)$ and CDF given by $\Phi(y|\mu,\sigma)$. The truncated normal distribution of random variable $x\in(a,b)$ based on $y$ but with cutoff values of $a\geq -\infty$ as a lower bound and $a < b\leq\infty$ as an upper bound has the following probability density function. |
| 489 | + |
| 490 | +$$ f(x|\mu,\sigma,a,b) = \begin{cases} 0 \quad\text{if}\quad x\leq a \\ \frac{\phi(x|\mu,\sigma)}{\Phi(b|\mu,\sigma) - \Phi(a|\mu,\sigma)}\quad\text{if}\quad a < x < b \\ 0 \quad\text{if}\quad x\geq b \end{cases} $$ |
| 491 | + |
| 492 | +The CDF of the truncated normal can be shown to be the following: |
| 493 | + |
| 494 | +$$ F(x|\mu,\sigma,a,b) = \begin{cases} 0 \quad\text{if}\quad x\leq a \\ \frac{\Phi(x|\mu,\sigma) - \Phi(a|\mu,\sigma)}{\Phi(b|\mu,\sigma) - \Phi(a|\mu,\sigma)}\quad\text{if}\quad a < x < b \\ 0 \quad\text{if}\quad x\geq b \end{cases} $$ |
| 495 | + |
| 496 | +The inverse CDF of the truncated normal takes a value $p$ between 0 and 1 and solves for the value of $x$ for which $p=F(x|\mu,\sigma,a,b)$. The expression for the inverse CDF of the truncated normal is the following: |
| 497 | + |
| 498 | +$$ x = \Phi^{-1}(z|\mu,\sigma) \quad\text{where}\quad z = p\Bigl[\Phi(b|\mu,\sigma) - \Phi(a|\mu,\sigma)\Bigr] + \Phi(a|\mu,\sigma) $$ |
| 499 | + |
| 500 | +Note that $z$ is just a transformation of $p$ such that $z\sim U\Bigl(\Phi^{-1}(a|\mu,\sigma), \Phi^{-1}(b|\mu,\sigma)\Bigr)$. |
| 501 | + |
| 502 | +The following code for `trunc_norm_pdf()` is a function that returns the probability distribution function value of random variable value $x$ given parameters $\mu$, $\sigma$, $c_{lb}$, $c_{ub}$. |
| 503 | + |
| 504 | +```{code-cell} ipython3 |
| 505 | +:tags: ["remove-output"] |
| 506 | +
|
| 507 | +def trunc_norm_pdf(xvals, mu, sigma, cut_lb, cut_ub): |
| 508 | + ''' |
| 509 | + -------------------------------------------------------------------- |
| 510 | + Generate pdf values from the normal pdf with mean mu and standard |
| 511 | + deviation sigma. If the cutoff is given, then the PDF values are |
| 512 | + inflated upward to reflect the zero probability on values above the |
| 513 | + cutoff. If there is no cutoff given, this function does the same |
| 514 | + thing as sp.stats.norm.pdf(x, loc=mu, scale=sigma). |
| 515 | + -------------------------------------------------------------------- |
| 516 | + INPUTS: |
| 517 | + xvals = (N,) vector, values of the normally distributed random |
| 518 | + variable |
| 519 | + mu = scalar, mean of the normally distributed random variable |
| 520 | + sigma = scalar > 0, standard deviation of the normally distributed |
| 521 | + random variable |
| 522 | + cut_lb = scalar or string, ='None' if no cutoff is given, otherwise |
| 523 | + is scalar lower bound value of distribution. Values below |
| 524 | + this value have zero probability |
| 525 | + cut_ub = scalar or string, ='None' if no cutoff is given, otherwise |
| 526 | + is scalar upper bound value of distribution. Values above |
| 527 | + this value have zero probability |
| 528 | +
|
| 529 | + OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: None |
| 530 | +
|
| 531 | + OBJECTS CREATED WITHIN FUNCTION: |
| 532 | + prob_notcut = scalar |
| 533 | + pdf_vals = (N,) vector, normal PDF values for mu and sigma |
| 534 | + corresponding to xvals data |
| 535 | +
|
| 536 | + FILES CREATED BY THIS FUNCTION: None |
| 537 | +
|
| 538 | + RETURNS: pdf_vals |
| 539 | + -------------------------------------------------------------------- |
| 540 | + ''' |
| 541 | + if cut_ub == 'None' and cut_lb == 'None': |
| 542 | + prob_notcut = 1.0 |
| 543 | + elif cut_ub == 'None' and cut_lb != 'None': |
| 544 | + prob_notcut = 1.0 - sts.norm.cdf(cut_lb, loc=mu, scale=sigma) |
| 545 | + elif cut_ub != 'None' and cut_lb == 'None': |
| 546 | + prob_notcut = sts.norm.cdf(cut_ub, loc=mu, scale=sigma) |
| 547 | + elif cut_ub != 'None' and cut_lb != 'None': |
| 548 | + prob_notcut = (sts.norm.cdf(cut_ub, loc=mu, scale=sigma) - |
| 549 | + sts.norm.cdf(cut_lb, loc=mu, scale=sigma)) |
| 550 | +
|
| 551 | + pdf_vals = ( |
| 552 | + (1/(sigma * np.sqrt(2 * np.pi)) * |
| 553 | + np.exp( - (xvals - mu)**2 / (2 * sigma**2))) / |
| 554 | + prob_notcut |
| 555 | + ) |
| 556 | +
|
| 557 | + return pdf_vals |
| 558 | +``` |
| 559 | + |
| 560 | +The following code `trunc_norm_draws` is a function that draws $S$ simulations of $N$ observations of the random variable $x_{n,s}$ that is distributed truncated normal. This function takes as an input an $N\times S$ matrix of uniform distributed values $u_{n,s}\sim U(0,1)$. |
| 561 | + |
| 562 | +```{code-cell} ipython3 |
| 563 | +:tags: ["remove-output"] |
| 564 | +
|
| 565 | +def trunc_norm_draws(unif_vals, mu, sigma, cut_lb, cut_ub): |
| 566 | + ''' |
| 567 | + -------------------------------------------------------------------- |
| 568 | + Draw (N x S) matrix of random draws from a truncated normal |
| 569 | + distribution based on a normal distribution with mean mu and |
| 570 | + standard deviation sigma and cutoffs (cut_lb, cut_ub). These draws |
| 571 | + correspond to an (N x S) matrix of randomly generated draws from a |
| 572 | + uniform distribution U(0,1). |
| 573 | + -------------------------------------------------------------------- |
| 574 | + INPUTS: |
| 575 | + unif_vals = (N, S) matrix, (N,) vector, or scalar in (0,1), random |
| 576 | + draws from uniform U(0,1) distribution |
| 577 | + mu = scalar, mean of the nontruncated normal distribution |
| 578 | + from which the truncated normal is derived |
| 579 | + sigma = scalar > 0, standard deviation of the nontruncated |
| 580 | + normal distribution from which the truncated normal is |
| 581 | + derived |
| 582 | + cut_lb = scalar or string, ='None' if no lower bound cutoff is |
| 583 | + given, otherwise is scalar lower bound value of |
| 584 | + distribution. Values below this cutoff have zero |
| 585 | + probability |
| 586 | + cut_ub = scalar or string, ='None' if no upper bound cutoff is |
| 587 | + given, otherwise is scalar lower bound value of |
| 588 | + distribution. Values below this cutoff have zero |
| 589 | + probability |
| 590 | +
|
| 591 | + OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: |
| 592 | + scipy.stats.norm() |
| 593 | +
|
| 594 | + OBJECTS CREATED WITHIN FUNCTION: |
| 595 | + cut_ub_cdf = scalar in [0, 1], cdf of N(mu, sigma) at upper bound |
| 596 | + cutoff of truncated normal distribution |
| 597 | + cut_lb_cdf = scalar in [0, 1], cdf of N(mu, sigma) at lower bound |
| 598 | + cutoff of truncated normal distribution |
| 599 | + unif2_vals = (N, S) matrix, (N,) vector, or scalar in (0,1), |
| 600 | + rescaled uniform derived from original. |
| 601 | + tnorm_draws = (N, S) matrix, (N,) vector, or scalar in (0,1), |
| 602 | + values drawn from truncated normal PDF with base |
| 603 | + normal distribution N(mu, sigma) and cutoffs |
| 604 | + (cut_lb, cut_ub) |
| 605 | +
|
| 606 | + FILES CREATED BY THIS FUNCTION: None |
| 607 | +
|
| 608 | + RETURNS: tnorm_draws |
| 609 | + -------------------------------------------------------------------- |
| 610 | + ''' |
| 611 | + # No cutoffs: truncated normal = normal |
| 612 | + if (cut_lb == None) & (cut_ub == None): |
| 613 | + cut_ub_cdf = 1.0 |
| 614 | + cut_lb_cdf = 0.0 |
| 615 | + # Lower bound truncation, no upper bound truncation |
| 616 | + elif (cut_lb != None) & (cut_ub == None): |
| 617 | + cut_ub_cdf = 1.0 |
| 618 | + cut_lb_cdf = sts.norm.cdf(cut_lb, loc=mu, scale=sigma) |
| 619 | + # Upper bound truncation, no lower bound truncation |
| 620 | + elif (cut_lb == None) & (cut_ub != None): |
| 621 | + cut_ub_cdf = sts.norm.cdf(cut_ub, loc=mu, scale=sigma) |
| 622 | + cut_lb_cdf = 0.0 |
| 623 | + # Lower bound and upper bound truncation |
| 624 | + elif (cut_lb != None) & (cut_ub != None): |
| 625 | + cut_ub_cdf = sts.norm.cdf(cut_ub, loc=mu, scale=sigma) |
| 626 | + cut_lb_cdf = sts.norm.cdf(cut_lb, loc=mu, scale=sigma) |
| 627 | +
|
| 628 | + unif2_vals = unif_vals * (cut_ub_cdf - cut_lb_cdf) + cut_lb_cdf |
| 629 | + tnorm_draws = sts.norm.ppf(unif2_vals, loc=mu, scale=sigma) |
| 630 | +
|
| 631 | + return tnorm_draws |
| 632 | +``` |
| 633 | + |
| 634 | +What would one simulation of 161 test scores look like from a truncated normal with mean $\mu=300$, $\sigma=30$? |
| 635 | + |
330 | 636 |
|
331 | 637 | (SecSMM_CodeExmp_BM72)= |
332 | 638 | ### Brock and Mirman (1972) estimation by SMM |
@@ -455,4 +761,6 @@ Also, use the identity matrix as your weighting matrix $\textbf{W}=\textbf{I}$ a |
455 | 761 | (SecSMMFootnotes)= |
456 | 762 | ## Footnotes |
457 | 763 |
|
458 | | -<!-- [^citation_note]: See {cite}`AuerbachEtAl:1981,AuerbachEtAl:1983`, {cite}`AuerbachKotlikoff:1983a,AuerbachKotlikoff:1983b,AuerbachKotlikoff:1983c`, and {cite}`AuerbachKotlikoff:1985`. --> |
| 764 | +The footnotes from this chapter. |
| 765 | + |
| 766 | +[^TruncNorm]: See Section {ref}`SecAppendixTruncNormal` of the Appendix for a description of the truncated normal distribution. |
0 commit comments