diff --git a/lectures/_static/quant-econ.bib b/lectures/_static/quant-econ.bib index 0d0c531a0..30760a5da 100644 --- a/lectures/_static/quant-econ.bib +++ b/lectures/_static/quant-econ.bib @@ -929,7 +929,6 @@ @article{alchian1950uncertainty publisher={The University of Chicago Press} } - @article{mendoza1998international, title={The international ramifications of tax reforms: supply-side economics in a global economy}, author={Mendoza, Enrique G and Tesar, Linda L}, @@ -997,20 +996,6 @@ @incollection{Hurwicz:1962 year = {1962} } -@article{Hurwicz:1966, - abstract = {Publisher Summary This chapter concentrates on the structural form of interdependent systems. A great deal of effort is devoted in econometrics and elsewhere to find the behavior pattern of an observed configuration. Such effort is justified on the grounds that the knowledge of the behavior pattern is needed for the purpose of giving explanation or prediction. The merits of this justification are also examined in the chapter. At this point, the chapter considers certain difficulties encountered in the process of looking for the behavior patterns. In certain fields, notably economics (but also— for example, electronic network theory), it deals with a set (configuration) of objects (components) that are interdependent in their behavior. For purposes of both theoretical analysis and empirical investigation of such situations, the phenomena are often described in the chapter (in idealized form) by means of a system of simultaneous equations. History alone is not enabled to determine the behavior pattern of the configuration; but this does not mean that the task is hopeless. The priori information is obtained from the axiom systems or theories that are believed to be relevant to the behavior pattern of the configuration.}, - author = {Leonid Hurwicz}, - doi = {http://dx.doi.org/10.1016/S0049-237X(09)70590-7}, - editor = {Patrick S Ernest Nagel and Alfred Tarski}, - journal = {Logic, Methodology and Philosophy of Science Proceeding of the 1960 International Congress}, - pages = {232-239}, - publisher = {Elsevier}, - title = {On the Structural Form of Interdependent Systems}, - volume = {44}, - url = {http://www.sciencedirect.com/science/article/pii/S0049237X09705907}, - year = {1966}, -} - @article{hurwicz1950least, title = {Least squares bias in time series}, author = {Hurwicz, Leonid}, @@ -1309,7 +1294,7 @@ @article{BHS_2009 } @article{HST_1999, - author = {Lars Peter Hansen and Thomas J. Sargent and Thomas D. Tallarini}, + author = {Hansen, Lars Peter and Sargent, Thomas J. and Tallarini, Thomas D.}, title = {{Robust Permanent Income and Pricing}}, journal = {Review of Economic Studies}, year = 1999, @@ -1816,7 +1801,6 @@ @article{rosen1994cattle publisher = {The University of Chicago Press} } - @article{Reffett1996, title = {Production-based asset pricing in monetary economies with transactions costs}, author = {Reffett, Kevin L}, @@ -2369,17 +2353,6 @@ @article{AMSS_2002 month = {December} } -@article{aiyagari2002optimal, - title = {Optimal taxation without state-contingent debt}, - author = {Aiyagari, S Rao and Marcet, Albert and Sargent, Thomas J and Sepp{\"a}l{\"a}, Juha}, - journal = {Journal of Political Economy}, - volume = {110}, - number = {6}, - pages = {1220--1254}, - year = {2002}, - publisher = {The University of Chicago Press} -} - @article{Rust1996, title = {Numerical dynamic programming in economics}, author = {Rust, John}, @@ -3391,18 +3364,6 @@ @techreport{giannoni2010optimal institution = {National Bureau of Economic Research} } - -@article{pearlman1986rational, - title = {Rational expectations models with partial information}, - author = {Pearlman, Joseph and Currie, David and Levine, Paul}, - journal = {Economic Modelling}, - volume = {3}, - number = {2}, - pages = {90--105}, - year = {1986}, - publisher = {Elsevier} -} - @techreport{backus1986consistency, title = {The consistency of optimal policy in stochastic rational expectations models}, author = {Backus, David and Driffill, John}, @@ -3502,7 +3463,6 @@ @article{kikuchi2018span publisher = {Wiley Online Library} } - @article{do1999solutions, title = {Solutions for the linear-quadratic control problem of Markov jump linear systems}, author = {Do Val, JBR and Geromel, JC and Costa, OLV}, @@ -4073,3 +4033,22 @@ @book{Nummelin_1984 year = {1984}, doi = {10.1017/CBO9780511526237} } +@article{EpsteinWang1994, + author = {Epstein, Larry G. and Wang, Tan}, + title = {{Intertemporal Asset Pricing under Knightian Uncertainty}}, + journal = {Econometrica}, + volume = {62}, + number = {2}, + pages = {283--322}, + year = {1994} +} + +@article{Campbell1987, + author = {Campbell, John Y.}, + title = {{Does Saving Anticipate Declining Labor Income? An Alternative Test of the Permanent Income Hypothesis}}, + journal = {Econometrica}, + volume = {55}, + number = {6}, + pages = {1249--1273}, + year = {1987} +} diff --git a/lectures/_toc.yml b/lectures/_toc.yml index 929fefe00..0a4f545b2 100644 --- a/lectures/_toc.yml +++ b/lectures/_toc.yml @@ -65,6 +65,7 @@ parts: - file: wealth_dynamics - file: kalman - file: kalman_2 + - file: kalman_filter_var - file: organization_capital - file: measurement_models - caption: Search @@ -109,8 +110,10 @@ parts: - file: cross_product_trick - file: perm_income - file: perm_income_cons - - file: theil_1 + - file: robust_permanent_income + - file: theil_1 - file: theil_2 + - file: lq_consumption_smoothing - file: lq_inventories - caption: Optimal Growth numbered: true diff --git a/lectures/kalman_2.md b/lectures/kalman_2.md index 9b4c2e916..37ff9b433 100644 --- a/lectures/kalman_2.md +++ b/lectures/kalman_2.md @@ -264,7 +264,7 @@ u_hat_t = x_hat_t[1, :] For this fixed worker initial state, we plot $\mathbb{E}[y_t | y^{t-1}] = G \hat x_t$ where $\hat x_t = \mathbb{E}[x_t | y^{t-1}]$. -We also plot $\mathbb{E}[u_0 | y^{t-1}]$, which is the firm inference about a worker's hard-wired "work ethic" $u_0$, conditioned on information $y^{t-1}$ that it has about him or her coming into period $t$. +We also plot $\mathbb{E}[u_0 | y^{t-1}]$, which is the firm's inference about a worker's hard-wired "work ethic" $u_0$, conditioned on information $y^{t-1}$ that it has about him or her coming into period $t$. We can watch how the firm updates its inference $\mathbb{E}[u_0 | y^{t-1}]$ about the worker's work ethic as more output observations arrive. @@ -341,7 +341,7 @@ for i, t in enumerate(np.linspace(0, T-1, 3, dtype=int)): # Create a contour plot for the PDF con = axs[i].contour(h, u, pdf_values, cmap='viridis') axs[i].clabel(con, inline=1, fontsize=10) - axs[i].set_title(f'Time Step {t}') + axs[i].set_title(f'time step {t}') axs[i].set_xlabel(r'$h_{{{}}}$'.format(str(t))) axs[i].set_ylabel(r'$u_{{{}}}$'.format(str(t))) @@ -467,7 +467,7 @@ namedtuple. Here is an example. ```{code-cell} ipython3 -# We can set these parameters when creating a worker -- just like classes! +# We can set these parameters when creating a worker, just like classes! hard_working_worker = create_worker(α=.4, β=.8, hhat_0=7.0, uhat_0=100, σ_h=2.5, σ_u=3.2) diff --git a/lectures/kalman_filter_var.md b/lectures/kalman_filter_var.md new file mode 100644 index 000000000..93a5a051e --- /dev/null +++ b/lectures/kalman_filter_var.md @@ -0,0 +1,1518 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.11.1 +kernelspec: + display_name: Python 3 + language: python + name: python3 +--- + +# The Kalman Filter and Vector Autoregressions + +```{index} single: Kalman Filter +``` + +```{index} single: Vector Autoregression; and Kalman filter +``` + +## Overview + +This lecture derives the **Kalman filter** for a linear Gaussian state space system +and then uses it to construct **vector autoregressions (VARs)**. + +Our approach rests on repeated applications of the **population linear least squares** +projection formula, the insight that computing a conditional expectation of a jointly +normal random vector is the same as running a population OLS regression. + +The lecture covers: + +- deriving the Kalman filter recursions from first principles +- the **matrix Riccati difference equation** governing conditional covariance matrices +- the **innovations representation** and the **Gram-Schmidt** whitening property +- the structure of a **hidden Markov model** +- the **likelihood function** for a state space system and its role in maximum likelihood + and Bayesian estimation +- how the time-invariant Kalman filter generates a **vector autoregression** +- why the Kalman filter is an essential tool for **interpreting VARs** estimated from + economic data + +## The state space system + +The Kalman filter applies to the **state space system** for $t \geq 0$: + +$$ +\begin{aligned} +x_{t+1} &= A x_t + C w_{t+1} \\ +y_t &= G x_t + v_t +\end{aligned} +$$ (eq:statespace) + +where + +- $x_t$ is an $n \times 1$ **state vector** (hidden, unobserved) +- $y_t$ is an $m \times 1$ vector of **signals** on the hidden state (observed) +- $w_{t+1}$ is a $p \times 1$ i.i.d. sequence of normal random variables with mean + $0$ and identity covariance matrix +- $v_t$ is an i.i.d. sequence of normal random variables with mean zero and + covariance matrix $R$ +- $w_{t+1}$ and $v_s$ are orthogonal for all $t+1$ and $s \geq 0$ + +The initial state satisfies + +$$ +x_0 \sim \mathcal{N}(\hat{x}_0, \Sigma_0) +$$ (eq:kalf3) + +We observe $y_t, \ldots, y_0$ but **not** $x_t, \ldots, x_0$ at time $t$, +and we know all first and second moments implied by +{eq}`eq:statespace` and {eq}`eq:kalf3`. + +## The Kalman filter + +### Starting distribution + +Working forward in time starting at $t = 0$, before observing $y_0$, +the specification {eq}`eq:statespace`-{eq}`eq:kalf3` implies that the +marginal distribution of $y_0$ is + +$$ +y_0 \sim \mathcal{N}(G \hat{x}_0,\; G \Sigma_0 G' + R) +$$ (eq:kalf4) + +For $t \geq 0$ let $y^t = [y_t, y_{t-1}, \ldots, y_0]$. + +We want a convenient recursive representation of the conditional distribution of +$y_t$ given history $y^{t-1}$. + +The Kalman filter attains this by constructing recursive formulas for +$\hat{x}_t$ and $\Sigma_t$ such that the distribution of $y_t$ conditional on +$y^{t-1}$ generalises {eq}`eq:kalf4` to + +$$ +y_t \sim \mathcal{N}(G \hat{x}_t,\; G \Sigma_t G' + R) +$$ (eq:kalf400) + +for $t \geq 1$, where the distribution of $x_t$ conditional on $y^{t-1}$ is +$\mathcal{N}(\hat{x}_t, \Sigma_t)$. + +The objects $\hat{x}_t$ and $\Sigma_t$ characterise the **population regression** + +$$ +\hat{x}_t = \mathbb{E}[x_t \mid y_{t-1}, \ldots, y_0] +$$ + +and the **conditional covariance matrix** + +$$ +\Sigma_t = \mathbb{E}\!\left[(x_t - \hat{x}_t)(x_t - \hat{x}_t)' \mid y_{t-1}, \ldots, y_0\right] +$$ + +### Derivation + +At each date, our approach is to **regress what we do not know on what we know**. + +```{note} +Because our assumptions imply that $\{x_t, y_t\}_{t=0}^\infty$ is a jointly normal +stochastic process, linear least squares regressions equal conditional mathematical +expectations — each step below is an application of Bayes' law. + +Under the weaker assumption that all means and covariances exist without joint normality, +the same calculations yield "wide-sense conditional expectations" that coincide with true +conditional expectations only when those conditional expectations are linear. +``` + +**Arrive at $t = 0$** knowing $\hat{x}_0$ and $\Sigma_0$. + +**Innovation:** The information about $x_0$ in $y_0$ that is new relative to +$(\hat{x}_0, \Sigma_0)$ is the **innovation** + +$$ +a_0 \equiv y_0 - G \hat{x}_0 +$$ + +**Updating beliefs about $x_0$:** The conditional mean +$\mathbb{E}[x_0 \mid y_0] = \hat{x}_0 + L_0(y_0 - G\hat{x}_0)$ satisfies the +population regression formula + +$$ +x_0 - \hat{x}_0 = L_0(y_0 - G\hat{x}_0) + \eta +$$ (eq:kalf5) + +where $\eta$ is the least squares residual. + +Orthogonality of $\eta$ to +$(y_0 - G\hat{x}_0)$ pins down $L_0$ via the normal equations + +$$ +\mathbb{E}(x_0 - \hat{x}_0)(y_0 - G\hat{x}_0)' += L_0\, \mathbb{E}(y_0 - G\hat{x}_0)(y_0 - G\hat{x}_0)' +$$ + +Evaluating the moment matrices and solving for $L_0$ gives + +$$ +L_0 = \Sigma_0 G'(G \Sigma_0 G' + R)^{-1} +$$ (eq:kalf6) + +**Forecasting $x_1$:** Note that + +$$ +x_1 = A\hat{x}_0 + A(x_0 - \hat{x}_0) + C w_1 +$$ (eq:kalf6a) + +Applying {eq}`eq:kalf5` gives $\mathbb{E}[x_1 \mid y_0] = A\hat{x}_0 + AL_0(y_0 - G\hat{x}_0)$, +which we write as + +$$ +\hat{x}_1 = A\hat{x}_0 + K_0(y_0 - G\hat{x}_0) +$$ (eq:kalf7) + +where the **Kalman gain** at time 0 is + +$$ +K_0 = A \Sigma_0 G'(G \Sigma_0 G' + R)^{-1} +$$ (eq:kalf7a) + +**Updating the covariance:** Subtracting {eq}`eq:kalf7` from {eq}`eq:kalf6a` yields + +$$ +x_1 - \hat{x}_1 = A(x_0 - \hat{x}_0) + C w_1 - K_0(y_0 - G\hat{x}_0) +$$ (eq:kalf8) + +Using {eq}`eq:kalf8` and $y_0 = G x_0 + v_0$ to evaluate +$\Sigma_1 \equiv \mathbb{E}[(x_1 - \hat{x}_1)(x_1 - \hat{x}_1)' \mid y_0]$ gives + +$$ +\Sigma_1 = (A - K_0 G)\Sigma_0(A - K_0 G)' + CC' + K_0 R K_0' +$$ (eq:kalf9) + +Thus $f(x_1 \mid y_0) \sim \mathcal{N}(\hat{x}_1, \Sigma_1)$. + +Collecting the time-$0$ equations: + +$$ +\begin{aligned} +a_0 &= y_0 - G\hat{x}_0 \\ +K_0 &= A\Sigma_0 G'(G\Sigma_0 G' + R)^{-1} \\ +\hat{x}_1 &= A\hat{x}_0 + K_0 a_0 \\ +\Sigma_1 &= CC' + K_0 R K_0' + (A - K_0 G)\Sigma_0(A - K_0 G)' +\end{aligned} +$$ (eq:kalf1000) + +System {eq}`eq:kalf1000` maps a mean-covariance pair $(\hat{x}_0, \Sigma_0)$ into a new +pair $(\hat{x}_1, \Sigma_1)$, with auxiliary outputs $(a_0, K_0)$. + +Recognising that "we are in the same situation at the start of period 1 as at +the start of period 0" activates a recursion, the **Kalman filter**. + +### The Kalman filter recursions + +Iterating system {eq}`eq:kalf1000` yields the **Kalman filter** for $t \geq 0$: + +$$ +\begin{aligned} +a_t &= y_t - G\hat{x}_t \\ +K_t &= A\Sigma_t G'(G\Sigma_t G' + R)^{-1} \\ +\hat{x}_{t+1} &= A\hat{x}_t + K_t a_t \\ +\Sigma_{t+1} &= CC' + K_t R K_t' + (A - K_t G)\Sigma_t(A - K_t G)' +\end{aligned} +$$ (eq:kalf10) + +Here $K_t$ is the **Kalman gain** at time $t$. + +### The matrix Riccati equation + +Substituting the expression for $K_t$ from the second line of {eq}`eq:kalf10` +into the fourth line gives an equivalent update formula: + +$$ +\Sigma_{t+1} = A\Sigma_t A' + CC' + - A\Sigma_t G'(G\Sigma_t G' + R)^{-1} G\Sigma_t A' +$$ (eq:riccati) + +Equation {eq}`eq:riccati` is the **matrix Riccati difference equation**. + +It governs the sequence of conditional covariance matrices $\{\Sigma_t\}_{t=0}^\infty$ +without reference to the observations $\{y_t\}$. + +```{index} single: Riccati equation; matrix difference +``` + +## The Gram-Schmidt process + +The random vector + +$$ +a_t = y_t - \mathbb{E}[y_t \mid y_{t-1}, \ldots, y_0] +$$ + +is the **innovation** of $y_t$ with respect to $y^{t-1}$, the part of $y_t$ +that cannot be predicted from past observations. + +Note that $\mathbb{E} a_t a_t' = G\Sigma_t G' + R$, the matrix whose inverse appears in +the Kalman gain formula {eq}`eq:kalf10`. + +A direct calculation using $a_t = G(x_t - \hat{x}_t) + v_t$ shows that +$\mathbb{E} a_t a_{t-1}' = 0$ and, more generally, $\mathbb{E}[a_t \mid a_{t-1}, \ldots, a_0] = 0$. + +```{note} +An alternative argument from first principles: let $H(y^t)$ denote the closed linear +span of $y^t$. + +Since $a_{t+1} = y_{t+1} - \mathbb{E}[y_{t+1} \mid y^t]$ is a least-squares +error, $a_{t+1} \perp H(y^t)$, and in particular $a_{t+1} \perp a_t$. + +Thus $\{a_t\}$ is a white-noise process of innovations to $\{y_t\}$. +``` + +Sometimes {eq}`eq:kalf10` is called a **whitening filter**: it takes the signal process +$\{y_t\}$ as input and produces the white-noise innovation process $\{a_t\}$ as output. + +The linear space $H(a^t)$ is an orthogonal basis for the linear space $H(y^t)$. + +Rather than computing $\mathbb{E}[x_t \mid y_{t-1}, \ldots, y_0]$ via one large regression, +the Kalman filter performs a sequence of small regressions on successive orthogonal +components of the basis $[a_{t-1}, \ldots, a_0]$, an instance of the +**Gram-Schmidt procedure**. + +```{index} single: Gram-Schmidt process +``` + +## Hidden Markov model + +System {eq}`eq:statespace`-{eq}`eq:kalf3` is an example of a **hidden Markov model**. + +```{index} single: hidden Markov model +``` + +The observed process $\{y_t\}_{t=0}^\infty$ is *not* Markov, but the hidden +process $\{x_t\}_{t=0}^\infty$ *is* Markov. + +So is the process of means and +covariances $\{(\hat{x}_t, \Sigma_t)\}$, which are sufficient statistics for +the distribution of $x_t$ conditional on $[y_{t-1}, \ldots, y_0]$. + +## Estimation + +### The innovations representation + +The **innovations representation** emerging from the Kalman filter is + +$$ +\begin{aligned} +\hat{x}_{t+1} &= A\hat{x}_t + K_t a_t \\ +y_t &= G\hat{x}_t + a_t +\end{aligned} +$$ (eq:innovrep) + +where $\hat{x}_t = \mathbb{E}[x_t \mid y^{t-1}]$ for $t \geq 1$ and +$\mathbb{E}[a_t a_t' \mid y^{t-1}] = G\Sigma_t G' + R \equiv \Omega_t$. + +For $t \geq 1$, $\mathbb{E}[y_t \mid y^{t-1}] = G\hat{x}_t$ and the conditional +distribution of $y_t$ given $y^{t-1}$ is $\mathcal{N}(G\hat{x}_t, \Omega_t)$. + +The objects $(G\hat{x}_t, \Omega_t)$ emerging from the Kalman filter recursions +therefore completely characterise this conditional distribution. + +### The likelihood function + +We can factor the likelihood of a sample $(y_T, y_{T-1}, \ldots, y_0)$ as + +$$ +f(y_T, \ldots, y_0) + = f(y_T \mid y^{T-1})\, f(y_{T-1} \mid y^{T-2}) \cdots f(y_1 \mid y_0)\, f(y_0) +$$ (eq:diff100) + +The log conditional density of the $m \times 1$ vector $y_t$ is + +$$ +\log f(y_t \mid y^{t-1}) + = -\frac{m}{2}\log(2\pi) + - \frac{1}{2}\log\det(\Omega_t) + - \frac{1}{2}\, a_t' \Omega_t^{-1} a_t +$$ (eq:gauss100) + +Using {eq}`eq:gauss100` and {eq}`eq:kalf10` together, we can evaluate +the likelihood {eq}`eq:diff100` recursively for any parameter vector $\theta$ +that underlies the matrices $A, G, C, R$. + +Such calculations are at the heart of efficient strategies for computing +**maximum likelihood estimators** of free parameters. + +### Bayesian inference + +The likelihood function is also central to **Bayesian inference**. + +Where $\theta$ is the parameter vector, $y_0^T$ the data, and +$\tilde{p}(\theta)$ a prior density over $\theta$ before seeing $y_0^T$, +Bayes' law gives the **posterior** + +$$ +\tilde{p}(\theta \mid y_0^T) + = \frac{f(y_0^T \mid \theta)\,\tilde{p}(\theta)} + {\int f(y_0^T \mid \theta)\,\tilde{p}(\theta)\, d\theta} +$$ + +The denominator is the marginal joint density $f(y_0^T)$. + +## Vector autoregressions and the Kalman filter + +### Convergence to a steady state + +Under conditions discussed by Anderson, Hansen, McGrattan, and Sargent (1996), +iterations on the Riccati equation {eq}`eq:riccati` converge to a +**time-invariant** matrix $\Sigma$ from any positive semi-definite starting value +$\Sigma_0$. + +A time-invariant fixed point $\Sigma_t = \Sigma$ of {eq}`eq:riccati` is the +covariance matrix of $x_t$ around + +$$ +\mathbb{E}\!\left[x_t \mid \{y_s\}_{s \leq t-1}\right] +$$ + +where the conditioning extends over the **semi-infinite** past $s \leq t-1$. + +### A time-invariant VAR + +If the fixed point $\Sigma$ exists and we initialise the filter at $\Sigma_0 = \Sigma$, +the innovations representation {eq}`eq:innovrep` becomes time-invariant: + +$$ +\begin{aligned} +\hat{x}_{t+1} &= A\hat{x}_t + K a_t \\ +y_t &= G\hat{x}_t + a_t +\end{aligned} +$$ (eq:innovti) + +where $\mathbb{E} a_t a_t' = G\Sigma G' + R$ and the **steady-state Kalman gain** is +$K = A\Sigma G'(G\Sigma G' + R)^{-1}$. + +From {eq}`eq:innovti` we obtain $\hat{x}_{t+1} = (A - KG)\hat{x}_t + K y_t$. + +If the eigenvalues of $A - KG$ are bounded in modulus strictly below unity, we can +solve this equation forward to get + +$$ +\hat{x}_{t+1} = \sum_{j=0}^\infty (A - KG)^j K\, y_{t-j} +$$ (eq:xhatform) + +Substituting {eq}`eq:xhatform` into the observation equation of {eq}`eq:innovti` +gives the **vector autoregression** + +$$ +y_t = G \sum_{j=0}^\infty (A - KG)^j K\, y_{t-j-1} + a_t +$$ (eq:var1) + +where by construction + +$$ +\mathbb{E}\!\left[a_t\, y_{t-j-1}'\right] = 0 \quad \forall\, j \geq 0 +$$ (eq:varorth) + +The orthogonality conditions {eq}`eq:varorth` identify {eq}`eq:var1` as a +**vector autoregression**. + +Defining the lag operator $L$ by $L x_{t+1} \equiv x_t$, the +**moving average representation** deduced from {eq}`eq:innovti` is + +$$ +y_t = \left[I + G(I - AL)^{-1} KL\right] a_t + = \left[I + G\sum_{j=0}^\infty A^j K L^{j+1}\right] a_t +$$ + +```{index} single: vector autoregression +``` + +### Interpreting VARs + +Equilibria of economic models (or their linear or log-linear approximations) +typically take the form of state space system {eq}`eq:statespace`. + +This hidden Markov model disturbs the state $x_t$ by the $p \times 1$ shock +vector $w_{t+1}$ and perturbs the $m \times 1$ vector of observables $y_t$ by the +$m \times 1$ measurement error $v_t$. + +An economic theory typically makes $w_{t+1}$ and $v_t$ directly interpretable as +shocks to preferences, technologies, endowments, or information sets. + +The state space system {eq}`eq:statespace` represents $\{y_t\}$ in terms of these +**interpretable shocks**. + +However, in the typical situation these shocks +**cannot** be recovered directly from the $y_t$'s, even when $A, G, C, R$ are known. + +The innovations representation {eq}`eq:innovti` represents the *same* stochastic +process $\{y_t\}$ in terms of the $m \times 1$ vector $a_t$ of innovations that +would be recovered by running an infinite-order population vector autoregression. + +Its role in mapping the original representation {eq}`eq:statespace` to the VAR +{eq}`eq:var1` makes the Kalman filter an indispensable tool for +**interpreting vector autoregressions**. + +```{index} single: Kalman Filter; and vector autoregressions +``` + +## Spectral factorization identity + +```{index} single: spectral factorization identity +``` + +Because the original state space system {eq}`eq:statespace` and the innovations +representation {eq}`eq:innovti` describe the **same** stochastic process $\{y_t\}$, +they imply two distinct formulas for the **spectral density matrix** of $\{y_t\}$. + +Equating those formulas yields the *spectral factorization identity*. + +### Two representations of the spectral density + +**From the original state space system.** Writing the first line of +{eq}`eq:statespace` as $x_t = (zI - A)^{-1} C w_{t+1}$ (using the $z$-transform +convention $z^{-1} x_t = x_{t-1}$), the covariance generating function of +$\{x_t\}$ is + +$$ +S_x(z) = (zI - A)^{-1} CC' (z^{-1}I - A')^{-1}. +$$ + +Since $v_t$ is orthogonal to $x_t$, the spectral density of $\{y_t\}$ is + +$$ +S_y(z) = G(zI - A)^{-1} CC' (z^{-1}I - A')^{-1} G' + R. +$$ (eq:sf_original) + +**From the innovations representation.** The time-invariant innovations +representation {eq}`eq:innovti` gives $y_t = [G(zI - A)^{-1}K + I]\, a_t$. +Since $a_t$ is white noise with covariance matrix $G\Sigma G' + R$, the +spectral density is also + +$$ +S_y(z) = \bigl[G(zI-A)^{-1}K + I\bigr] + \bigl(G\Sigma G' + R\bigr) + \bigl[K'(z^{-1}I - A')^{-1}G' + I\bigr]. +$$ (eq:sf_innov) + +### The spectral factorization identity + +Equating {eq}`eq:sf_original` and {eq}`eq:sf_innov` gives the +**spectral factorization identity**: + +$$ +G(zI - A)^{-1} CC' (z^{-1}I - A')^{-1} G' + R = +\bigl[G(zI-A)^{-1}K + I\bigr] +\bigl(G\Sigma G' + R\bigr) +\bigl[K'(z^{-1}I - A')^{-1}G' + I\bigr]. +$$ (eq:sf_identity) + +The left side expresses $S_y(z)$ in terms of the **structural shocks** +$(w_{t+1}, v_t)$ and the matrices $(A, C, G, R)$. + +The right side expresses the same object as a spectral factor built from the +**innovations** $a_t$ and the steady-state Kalman gain $K$. + +### Connection to the Wold and autoregressive representations + +The factorization {eq}`eq:sf_identity` underpins the passage from the innovations +representation to the Wold moving average and to the VAR. + +**Wold representation.** From {eq}`eq:innovti`, solving for $\hat{x}_t$ over the +semi-infinite past gives $\hat{x}_{t+1} = (I - AL)^{-1} K a_t$, so + +$$ +y_t = \bigl[G(I - AL)^{-1} KL + I\bigr]\, a_t. +$$ (eq:sf_wold) + +**Autoregressive (VAR) representation.** Applying the inverse of the +moving-average operator in {eq}`eq:sf_wold`, using the identity + +$$ +\bigl[G(I-AL)^{-1}KL + I\bigr]^{-1} = I - G\bigl[I-(A-KG)L\bigr]^{-1}KL, +$$ + +gives + +$$ +y_t = G\bigl[I-(A-KG)L\bigr]^{-1}K\, y_{t-1} + a_t + = \sum_{j=1}^\infty G(A-KG)^{j-1}K\, y_{t-j} + a_t, +$$ (eq:sf_var) + +which is the **vector autoregression** already stated in {eq}`eq:var1`. + +The key analytical fact behind both representations is that, under mild stability +conditions, the zeros of $\det[G(zI-A)^{-1}K + I]$ all lie **inside** the unit +circle. + +This ensures that the moving-average operator in {eq}`eq:sf_wold` has a +causal (one-sided) inverse, so the innovation $a_t$ lies in the closed linear span +of current and past observations $y^t$, confirming that $a_t$ is the true +population forecast error from the VAR. + +## Python implementation + +We now illustrate the theory using the `quantecon` library, which provides +`LinearStateSpace` and `Kalman` classes that implement everything derived above. + +```{code-cell} ipython3 +import numpy as np +import matplotlib.pyplot as plt +import quantecon as qe +``` + +### A scalar hidden AR(1) model + +Consider a scalar hidden AR(1) state observed with measurement noise: + +$$ +\begin{aligned} +x_{t+1} &= \rho\, x_t + \sigma_w\, w_{t+1} \\ +y_t &= x_t + \sigma_v\, v_t +\end{aligned} +$$ + +with $w_t, v_t \sim \mathcal{N}(0, 1)$ i.i.d. + +```{code-cell} ipython3 +# Model parameters +rho = 0.9 +sigma_w = 0.5 +sigma_v = 1.0 + +# State space matrices (n=1, p=1, m=1) +A = np.array([[rho]]) +C = np.array([[sigma_w]]) +G = np.array([[1.0]]) +R = np.array([[sigma_v**2]]) + +# Build a LinearStateSpace and a Kalman filter object +H = np.array([[sigma_v]]) # measurement noise factor: R = H @ H.T +lss = qe.LinearStateSpace(A, C, G, H, mu_0=np.zeros(1), Sigma_0=np.eye(1) * 10.0) +kf = qe.Kalman(lss) +kf.set_state(np.zeros(1), np.eye(1) * 10.0) # diffuse prior +``` + +**Simulate a sample path of the true hidden state and noisy observations.** + +```{code-cell} ipython3 +T = 200 +x_path, y_path = lss.simulate(ts_length=T, random_state=42) + +# x_path has shape (n, T+1); y_path has shape (m, T) +x_true = x_path[0, :T] +y_obs = y_path[0, :] +``` + +**Run the Kalman filter manually step by step to collect filtered estimates.** + +```{code-cell} ipython3 +x_hats = np.zeros(T) +Sigmas = np.zeros(T) + +for t in range(T): + kf.update(y_obs[t:t+1]) # one full filter cycle + x_hats[t] = kf.x_hat.item() + Sigmas[t] = kf.Sigma.item() +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Kalman filtering of a scalar AR(1) state + name: fig-kfvar-scalar +--- +fig, axes = plt.subplots(3, 1, figsize=(10, 8), sharex=True) + +t_range = np.arange(T) + +axes[0].plot(t_range, x_true, lw=1.5, label='True state $x_t$') +axes[0].plot(t_range, x_hats, lw=1.5, linestyle='--', label=r'$\hat{x}_t$ (Kalman)') +axes[0].plot(t_range, y_obs, alpha=0.35, label='Observation $y_t$') +axes[0].set_title('State, Kalman estimate, and observation') +axes[0].legend(fontsize=9) + +axes[1].plot(t_range, Sigmas, color='tab:orange') +axes[1].set_title(r'Conditional variance $\Sigma_t$') +axes[1].axhline(kf.Sigma_infinity[0, 0], ls='--', color='k', + label=r'Steady-state $\Sigma_\infty$') +axes[1].legend(fontsize=9) + +axes[2].plot(t_range, y_obs - x_hats, color='tab:green', lw=0.8, alpha=0.7) +axes[2].set_title(r'Innovation $a_t = y_t - G\hat{x}_t$') +axes[2].set_xlabel('time $t$') + +plt.tight_layout() +plt.show() +``` + +### Convergence of the Riccati equation + +The `Kalman` class computes the steady-state covariance $\Sigma_\infty$ by +solving the discrete algebraic Riccati equation directly. + +```{code-cell} ipython3 +Sigma_inf, K_inf = kf.stationary_values() + +print(f"Steady-state covariance Σ_inf = {Sigma_inf[0, 0]:.6f}") +print(f"Kalman filter converged to Σ_t = {Sigmas[-1]:.6f}") +print(f"Steady-state Kalman gain K = {K_inf[0, 0]:.6f}") + +A_minus_KG = A - K_inf @ G +eigval = np.linalg.eigvals(A_minus_KG)[0] +print(f"\nEigenvalue of (A - KG) = {eigval:.6f}") +print(f"Stable VAR: {np.abs(eigval) < 1}") +``` + +### The VAR representation + +Using {eq}`eq:var1`, the coefficients in the infinite-order VAR representation are +$G(A - KG)^j K$ for $j = 0, 1, 2, \ldots$ + +We retrieve them via `stationary_coefficients`: + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: VAR coefficients from the innovations representation + name: fig-kfvar-varcoef +--- +J = 30 +var_coeffs = kf.stationary_coefficients(J, coeff_type='var') + +# var_coeffs[j] gives the mxm coefficient matrix at lag j+1 +lags = np.arange(1, J + 1) +coeff_values = np.array([var_coeffs[j][0, 0] for j in range(J)]) + +fig, ax = plt.subplots(figsize=(8, 4)) +ax.stem(lags, coeff_values, basefmt=' ') +ax.set_xlabel('lag $j$') +ax.set_ylabel(r'VAR coefficient $G(A{-}KG)^{j-1}K$') +plt.tight_layout() +plt.show() +``` + +The coefficients decay geometrically, confirming that the infinite-order VAR +{eq}`eq:var1` is well approximated by a finite-lag truncation. + +### Likelihood evaluation + +We use {eq}`eq:gauss100` to evaluate the log-likelihood of the simulated sample. + +```{code-cell} ipython3 +def log_likelihood(A, C, G, R, y_data, x_hat_0, Sigma_0): + """Evaluate the log-likelihood using the Kalman filter recursions.""" + H_ = np.linalg.cholesky(R) # R = H_ @ H_.T + lss_ = qe.LinearStateSpace(A, C, G, H_, mu_0=x_hat_0, Sigma_0=Sigma_0) + kf_ = qe.Kalman(lss_) + kf_.set_state(x_hat_0, Sigma_0) + + T_, m_ = y_data.shape + loglik = 0.0 + + for t in range(T_): + x_h = kf_.x_hat + Sig = kf_.Sigma + Omega = G @ Sig @ G.T + R # innovation covariance + a_t = y_data[t] - (G @ x_h).flatten() + + sign, logdet = np.linalg.slogdet(Omega) + loglik += -0.5 * (m_ * np.log(2 * np.pi) + logdet + + float(a_t @ np.linalg.solve(Omega, a_t))) + kf_.update(y_data[t]) + + return loglik + + +y_data_col = y_obs.reshape(-1, 1) +ll = log_likelihood(A, C, G, R, + y_data_col, + np.zeros(1), np.eye(1) * 10.0) +print(f"Log-likelihood of sample: {ll:.4f}") +``` + +## An example + +We now work through a structured example that shows how a bivariate VAR(2) fits naturally into the state space framework and how the Kalman filter delivers a +Wold (innovations) representation. + +### Linear state-space system + +The state and observation equations are + +$$ +x_{t+1} = A x_t + C w_{t+1} +$$ (eq:ex_state) + +$$ +y_t = G x_t + v_t +$$ (eq:ex_obs) + +with initial condition and shock distributions + +$$ +x_0 \sim \mathcal{N}(M_0, \Omega_0), \quad +w_{t+1} \sim \mathcal{N}(0, I), \quad +v_t \sim \mathcal{N}(0, R). +$$ + +### Steady-state Riccati equation + +The steady-state error covariance matrix $\Sigma$ satisfies + +$$ +\Sigma = A \Sigma A' + CC' + - A \Sigma G' \bigl(G \Sigma G' + R\bigr)^{-1} G \Sigma A' +$$ (eq:ex_riccati) + +### Kalman gain + +$$ +K = A \Sigma G' \bigl(G \Sigma G' + R\bigr)^{-1} +$$ (eq:ex_gain) + +### Kalman filter recursion + +Starting from an initial estimate $\hat{x}_0$, the Kalman filter updates the state estimate via + +$$ +\hat{x}_{t+1} = A \hat{x}_t + K a_t +$$ (eq:ex_kf_update) + +where the **innovation** (prediction error) is + +$$ +a_t = y_t - G \hat{x}_t +$$ (eq:ex_innovation) + +Substituting {eq}`eq:ex_innovation` into {eq}`eq:ex_kf_update` and expanding: + +$$ +\hat{x}_{t+1} = A \hat{x}_t + K(y_t - G\hat{x}_t) + = (A - KG)\hat{x}_t + K y_t + = (A - KG)\hat{x}_t + K G x_t + K v_t +$$ (eq:ex_kf_expanded) + +### Impulse responses of $y_t$ to the innovations $a_t$ + +It is useful to compute the **ordinary impulse response functions** of the +observable vector $y_t$ to its own innovations $a_t$, the moving-average (Wold) +representation that is the mirror image of the VAR {eq}`eq:var1`. + +From the time-invariant innovations representation {eq}`eq:innovti` + +$$ +\hat{x}_{t+1} = A\hat{x}_t + K a_t, \qquad y_t = G\hat{x}_t + a_t, +$$ + +the moving-average representation {eq}`eq:sf_wold` is + +$$ +y_t = \bigl[I + G(I - AL)^{-1} K L\bigr]\, a_t + = a_t + \sum_{h=1}^{\infty} G A^{h-1} K\, a_{t-h}. +$$ + +Hence the impulse response of $y_t$ to a unit innovation $a_t$ is + +$$ +\Psi_0 = I, \qquad \Psi_h = G A^{h-1} K \quad (h \ge 1). +$$ (eq:ex_y_to_a) + +These coefficients decay at the rate governed by the eigenvalues of $A$, in +contrast to the innovation-to-structural-shock responses studied later in the +numerical example, which decay at the rate governed by the eigenvalues of +$A - KG$. + +We can read the coefficients {eq}`eq:ex_y_to_a` directly off a `quantecon` +`LinearStateSpace` object. + +We build a state-space system whose state is the +filtered estimate $\hat{x}_t$, whose single "shock" is the innovation $a_t$ +loaded through $C = K$, and whose observation matrix is $G$. + +The +`impulse_response` method of that object returns the sequence $G A^{j} K$ for +$j = 0, 1, 2, \ldots$, which are exactly the $\Psi_h$ for $h \ge 1$; we prepend +$\Psi_0 = I$ to capture the contemporaneous feed-through $y_t = G\hat{x}_t + a_t$. + +```{code-cell} ipython3 +def y_to_a_irf(A, K, G, T=40): + """ + Ordinary impulse responses of the observable y_t to its own + innovations a_t in the innovations (Wold) representation + + x_hat_{t+1} = A x_hat_t + K a_t + y_t = G x_hat_t + a_t. + + The moving-average coefficients are + Ψ_0 = I, Ψ_h = G A^{h-1} K (h >= 1). + + The h >= 1 terms come from quantecon's LinearStateSpace: loading the + innovation a_t through C = K makes its impulse_response method return + G A^j K for j = 0, 1, 2, .... We prepend Ψ_0 = I for the + contemporaneous feed-through. + + Returns an array of shape (T, m, m); entry [h, i, j] is the response + of observable i at horizon h to a unit innovation in component j. + """ + n, m = A.shape[0], G.shape[0] + lss = qe.LinearStateSpace(A, K, G, np.zeros((m, m)), mu_0=np.zeros(n)) + _, ycoef = lss.impulse_response(j=T - 2) # [GK, GAK, GA^2K, ...] + Psi = np.empty((T, m, m)) + Psi[0] = np.eye(m) # contemporaneous response + for h in range(1, T): + Psi[h] = ycoef[h - 1] + return Psi +``` + +### Augmented state-space representation + +We want to express the innovations $a_t$ as a function of the state shocks +$w_{t+1}$ and the measurement error $v_t$. + +To accomplish this, we start by stacking the true state $x_t$ and the filter estimate $\hat{x}_t$ into an augmented state vector that +obeys + +$$ +\begin{pmatrix} x_{t+1} \\ \hat{x}_{t+1} \end{pmatrix} += +\begin{pmatrix} A & 0 \\ KG & A - KG \end{pmatrix} +\begin{pmatrix} x_t \\ \hat{x}_t \end{pmatrix} ++ +\begin{pmatrix} C & 0 \\ 0 & K \end{pmatrix} +\begin{pmatrix} w_{t+1} \\ v_t \end{pmatrix} +$$ (eq:ex_augmented) + + +### Bivariate VAR(2) in state-space form + +Consider two observable series $r_t$ and $z_t$. + +Stack them into the state vector $x_t = (r_t,\; r_{t-1},\; z_t,\; z_{t-1})'$. + +We posit the VAR(2) state-transition equation: + +$$ +\begin{pmatrix} r_{t+1} \\ r_t \\ z_{t+1} \\ z_t \end{pmatrix} += +\begin{pmatrix} + d_1 & d_2 & d_3 & d_4 \\ + 1 & 0 & 0 & 0 \\ + \delta_1 & \delta_2 & \delta_3 & \delta_4 \\ + 0 & 0 & 1 & 0 +\end{pmatrix} +\begin{pmatrix} r_t \\ r_{t-1} \\ z_t \\ z_{t-1} \end{pmatrix} ++ +\begin{pmatrix} + c_{11} & c_{12} \\ + 0 & 0 \\ + c_{21} & c_{22} \\ + 0 & 0 +\end{pmatrix} +\begin{pmatrix} w_{1,t+1} \\ w_{2,t+1} \end{pmatrix} +$$ (eq:ex_var2_state) + +We consider two possible observation equations. + +The first is a bivariate observation of $r_t$ and $z_t$: + +$$ +\begin{pmatrix} r_t \\ z_t \end{pmatrix} += +\begin{pmatrix} + 1 & 0 & 0 & 0 \\ + 0 & 0 & 1 & 0 +\end{pmatrix} +\begin{pmatrix} r_t \\ r_{t-1} \\ z_t \\ z_{t-1} \end{pmatrix} ++ +\begin{pmatrix} 1 & 0 \\ 0 & 1 \end{pmatrix} +\begin{pmatrix} v_{1t} \\ v_{2t} \end{pmatrix} +$$ (eq:ex_var2_obs) + +The second is a univariate observation of $r_t$: + +$$ +y_t = \begin{pmatrix} 1 & 0 & 0 & 0 \end{pmatrix} +\begin{pmatrix} r_t \\ r_{t-1} \\ z_t \\ z_{t-1} \end{pmatrix} ++ v_{1t} +$$ (eq:ex_scalar_obs) + +For the bivariate observation case, +the Wold representation is + +$$ +\begin{pmatrix} r_t \\ z_t \end{pmatrix} += +\underbrace{G \bigl(I - (A - KG)L\bigr)^{-1} K}_{\text{transfer matrix}} +\, a_t +$$ (eq:ex_wold_bivariate) + +where $\mathbb{E} a_t a_t' = G \Sigma G' + R$ and $L$ is the lag operator. + +The innovation expressed in terms of the augmented state is + +$$ +a_t += \begin{bmatrix} G & -G \end{bmatrix} + \begin{pmatrix} x_t \\ \hat{x}_t \end{pmatrix} ++ \begin{bmatrix} 0 & I \end{bmatrix} + \begin{pmatrix} w_{t+1} \\ v_t \end{pmatrix} += G(x_t - \hat{x}_t) + v_t +$$ (eq:ex_innovation_aug) + + + +For the scalar ($1 \times 1$) case with a single observable $r_t$, the observation matrix $G_1$ is the first row of $G$ and the scalar gain $K_1$ is the outcome of the Kalman filter for that case. The univariate Wold representation is + +$$ +r_t = G_1 \bigl(I - (A - K_1 G_1) L\bigr)^{-1} K_1 \, u_t +$$ (eq:ex_wold_scalar) + +where the univariate innovation $u_t$ is given by + +$$ +u_t += \begin{bmatrix} G_1 & -G_1 \end{bmatrix} + \begin{pmatrix} x_t \\ \hat{x}_t \end{pmatrix} ++ \begin{bmatrix} 0 & 1 \end{bmatrix} + \begin{pmatrix} w_{t+1} \\ v_t \end{pmatrix} += G_1(x_t - \hat{x}_t) + v_{1,t} +$$ (eq:ex_innovation_aug2) + +and $\mathbb{E} u_t^2 = G_1 \check \Sigma G_1' + R_{11}$, and $\check \Sigma$ is the steady-state covariance of the augmented state vector associated with +$G_1, R_{11}$. + +### Numerical example: impulse responses of innovations + +We now set specific parameter values and compute the **impulse response functions +of the Kalman innovations** ($a_t$ in System 1 and $u_t$ in System 2) to the +two structural shocks $w_{1,t+1}$ and $w_{2,t+1}$. + +The key object is **not** the response of the observable $y_t$ to the shocks, but +rather the response of the innovation that the Kalman filter produces. + +In System 1, $a_t$ is the $2 \times 1$ forecast error of $(r_t, z_t)'$ given the +filter's estimate; in System 2, $u_t$ is the scalar forecast error of $r_t$ given +the filter's estimate based on the $r_t$ history alone. + +The parameter values are: + +$$ +\begin{aligned} +d_1 &= 0.80,\quad d_2 = 0.05,\quad d_3 = 0.75,\quad d_4 = -0.72 \\ +\delta_1 &= 0.00,\quad \delta_2 = 0.00,\quad \delta_3 = 0.75,\quad \delta_4 = 0.20 \\ +c_{11} &= 1.0,\quad c_{12} = 0.0,\quad c_{21} = 0.0,\quad c_{22} = 1.0 \\ +R &= 0.0001 \times I_2 \quad \text{(bivariate case)}, \qquad +R = 0.0001 \quad \text{(univariate case)}. +\end{aligned} +$$ + +These give the $4 \times 4$ transition matrix and $4 \times 2$ shock-loading matrix + +$$ +A = \begin{pmatrix} +0.80 & 0.05 & 0.75 & -0.72 \\ +1 & 0 & 0 & 0 \\ +0 & 0 & 0.75 & 0.20 \\ +0 & 0 & 1 & 0 +\end{pmatrix}, \qquad +C = \begin{pmatrix} +1 & 0 \\ +0 & 0 \\ +0 & 1 \\ +0 & 0 +\end{pmatrix}. +$$ + +**System 1** uses the bivariate observation equation {eq}`eq:ex_var2_obs`, so +$G$ selects $(r_t, z_t)'$ from the state and the innovation $a_t$ is $2 \times 1$. + +**System 2** uses the univariate observation equation {eq}`eq:ex_scalar_obs`, so +$G_1$ selects only $r_t$ and the innovation $u_t$ is scalar. + +Because the innovation equals $a_t = G(x_t - \hat{x}_t) + v_t$, a unit shock $w_j$ +propagates through the **augmented** state $(x_t', \hat{x}_t')'$. + +A straightforward +calculation using the augmented recursion {eq}`eq:ex_augmented` shows that the +**impulse response of the innovation** at horizon $h \geq 0$ is + +$$ +\text{IRF}_{a}(h) = G(A - KG)^h C e_j, \qquad j = 1, 2, +$$ + +where $e_j$ is the $j$-th column of $I_2$ and $K$ is the appropriate steady-state +Kalman gain (System 1 uses $K$ from the bivariate filter; System 2 uses $K_1$ from +the univariate filter). + +At $h=0$ the shock hits and the innovation equals $GCe_j$; +for $h \geq 1$ the response decays at the rate governed by the eigenvalues of $A - KG$. + +```{code-cell} ipython3 +import numpy as np +import matplotlib.pyplot as plt +import quantecon as qe + +# -- Parameters --------------------------------------------------------------- +d1, d2, d3, d4 = 0.80, 0.05, 0.75, -.72 +delta1, delta2, delta3, delta4 = 0.00, 0.00, 0.75, 0.20 +c11, c12, c21, c22 = 1.0, 0.0, 0.0, 1.0 +sigma_v = 0.01 # sqrt(0.0001) + +# -- Shared matrices ----------------------------------------------------------- +A_var = np.array([[d1, d2, d3, d4 ], + [1.0, 0.0, 0.0, 0.0 ], + [delta1, delta2, delta3, delta4], + [0.0, 0.0, 1.0, 0.0 ]]) + +C_var = np.array([[c11, c12], + [0.0, 0.0], + [c21, c22], + [0.0, 0.0]]) + +# -- System 1: bivariate observation ------------------------------------------ +G_biv = np.array([[1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.0]]) +H_biv = sigma_v * np.eye(2) # H @ H.T = 0.0001 * I_2 + +lss_biv = qe.LinearStateSpace(A_var, C_var, G_biv, H_biv, + mu_0=np.zeros(4), Sigma_0=np.eye(4)) +kf_biv = qe.Kalman(lss_biv) +Sigma_biv, K_biv = kf_biv.stationary_values() + +print("System 1 - steady-state Kalman gain K (4x2):") +print(np.round(K_biv, 5)) + +# -- System 2: univariate observation ----------------------------------------- +G_uni = np.array([[1.0, 0.0, 0.0, 0.0]]) +H_uni = np.array([[sigma_v]]) # H @ H.T = 0.0001 + +lss_uni = qe.LinearStateSpace(A_var, C_var, G_uni, H_uni, + mu_0=np.zeros(4), Sigma_0=np.eye(4)) +kf_uni = qe.Kalman(lss_uni) +Sigma_uni, K_uni = kf_uni.stationary_values() + +print("\nSystem 2 - steady-state Kalman gain K (4x1):") +print(np.round(K_uni, 5)) +``` + +```{code-cell} ipython3 +# -- Covariance comparisons ---------------------------------------------------- + +# State-noise covariance CC' versus innovation covariance G Σ G' + R (System 1) +CC_prime = C_var @ C_var.T +R_biv = (sigma_v**2) * np.eye(2) +innov_cov_biv = G_biv @ Sigma_biv @ G_biv.T + R_biv + +print("State-noise covariance CC' (4x4):") +print(np.round(CC_prime, 6)) + +print("\nSystem 1 - innovation covariance GΣG' + R (2x2):") +print(np.round(innov_cov_biv, 6)) + +# Steady-state state covariance: System 1 (Σ_biv) vs System 2 (Σ_uni) +print("\nSystem 1 - steady-state state covariance Σ (4x4):") +print(np.round(Sigma_biv, 6)) + +print("\nSystem 2 - steady-state state covariance Σ_check (4x4):") +print(np.round(Sigma_uni, 6)) + +print("\nDifference Σ_check - Σ (System 2 minus System 1):") +print(np.round(Sigma_uni - Sigma_biv, 6)) +``` + +#### Impulse responses of $y_t$ to the innovations $a_t$ + +We now apply the helper `y_to_a_irf` defined above to compute the ordinary +impulse responses {eq}`eq:ex_y_to_a` of the observable $y_t$ to its own +innovations $a_t$, for both System 1 (bivariate, so $a_t$ is $2 \times 1$) and +System 2 (univariate, so $u_t$ is scalar). + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: "System 1: IRFs of $y_t = (r_t, z_t)$ to its innovations $a_t$" + name: fig-kfvar-sys1-ya +--- +T_irf = 40 +horizons = np.arange(T_irf) + +Psi_biv = y_to_a_irf(A_var, K_biv, G_biv, T_irf) # System 1: (T, 2, 2) +Psi_uni = y_to_a_irf(A_var, K_uni, G_uni, T_irf) # System 2: (T, 1, 1) + +obs_labels = [r'$r_t$', r'$z_t$'] +innov_labels = [r'$a_{1,t}$', r'$a_{2,t}$'] + +# -- Figure: System 1 - responses of y_t = (r_t, z_t)' to its innovations a_t -- +fig, axes = plt.subplots(2, 2, figsize=(10, 6), sharex=True) +for i, obs in enumerate(obs_labels): + for j, inn in enumerate(innov_labels): + ax = axes[i, j] + ax.plot(horizons, Psi_biv[:, i, j], lw=1.8) + ax.axhline(0, color='k', lw=0.6, ls='--') + ax.set_title(f'{obs} to {inn}', fontsize=9) + if i == 1: + ax.set_xlabel('horizon $h$') + if j == 0: + ax.set_ylabel('response') + +plt.tight_layout() +plt.show() +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: "System 2: IRF of $r_t$ to its innovation $u_t$" + name: fig-kfvar-sys2-ya +--- +fig, ax = plt.subplots(figsize=(7, 3.5)) +ax.plot(horizons, Psi_uni[:, 0, 0], lw=1.8) +ax.axhline(0, color='k', lw=0.6, ls='--') +ax.set_xlabel('horizon $h$') +ax.set_ylabel('response') +plt.tight_layout() +plt.show() +``` + +At horizon $h = 0$ each innovation passes one-for-one into its own component of +$y_t$, the contemporaneous response equals the identity matrix $\Psi_0 = I$, so +the diagonal panels start at $1$ and the off-diagonal panels start at $0$. + +For $h \ge 1$ the responses propagate through the state matrix $A$ and decay +geometrically, tracing out the Wold moving-average representation of the +bivariate (System 1) and univariate (System 2) processes. + +#### Impulse responses of the innovations to the structural shocks + +```{code-cell} ipython3 +def innovation_irf(A, K, G, C, T=40): + """ + Impulse responses of the Kalman innovation to each structural shock. + + The innovation at horizon h to a unit shock e_j is + G (A - KG)^h C e_j, h = 0, 1, ..., T-1. + + Returns irf of shape (T, n_a, n_w). + """ + AKG = A - K @ G + n_a = G.shape[0] + n_w = C.shape[1] + irf = np.zeros((T, n_a, n_w)) + x = C.copy() # (A-KG)^0 @ C at h = 0 + for h in range(T): + irf[h] = G @ x + x = AKG @ x + return irf + +T_irf = 40 +horizons = np.arange(T_irf) + +irf_biv = innovation_irf(A_var, K_biv, G_biv, C_var, T_irf) # (T, 2, 2) +irf_uni = innovation_irf(A_var, K_uni, G_uni, C_var, T_irf) # (T, 1, 2) + +shock_labels = [r'Shock $w_{1,t+1}$', r'Shock $w_{2,t+1}$'] +innov_biv_lbl = [r'$a_{1,t}$', r'$a_{2,t}$'] + +# -- Figure 1: System 1 - innovation IRFs -------------------------------------- +fig, axes = plt.subplots(2, 2, figsize=(10, 6), sharex=True) +for j, shock in enumerate(shock_labels): + for i, albl in enumerate(innov_biv_lbl): + ax = axes[i, j] + ax.plot(horizons, irf_biv[:, i, j], lw=1.8) + ax.axhline(0, color='k', lw=0.6, ls='--') + ax.set_title(f'System 1 - {albl} to {shock}', fontsize=9) + ax.set_xlabel('horizon $h$') + if j == 0: + ax.set_ylabel('innovation response') + +plt.suptitle(r'System 1: IRFs of innovation $a_t$ to structural shocks', + fontsize=11) +plt.tight_layout() +plt.show() + +# -- Figure 2: System 2 - innovation IRFs -------------------------------------- +fig, axes = plt.subplots(1, 2, figsize=(10, 3.5)) +for j, shock in enumerate(shock_labels): + axes[j].plot(horizons, irf_uni[:, 0, j], lw=1.8) + axes[j].axhline(0, color='k', lw=0.6, ls='--') + axes[j].set_title(f'System 2 - $u_t$ to {shock}', fontsize=9) + axes[j].set_xlabel('horizon $h$') +axes[0].set_ylabel('innovation response') + +plt.suptitle(r'System 2: IRFs of innovation $u_t$ to structural shocks', + fontsize=11) +plt.tight_layout() +plt.show() +``` + +The two sets of innovation impulse responses illustrate an important point. + +In **System 1** the filter uses both $r_t$ and $z_t$, so $a_t$ is $2 \times 1$ and +its response to each shock is a pair of numbers at every horizon. + +In **System 2** only $r_t$ is observed, so $u_t$ is scalar and carries less +information about the two-dimensional shock vector. + +Because $K$ differs across the two systems, the decay matrix $A - KG$ differs +as well, so the innovation IRFs $G(A-KG)^h C e_j$ differ across the two systems +even though the structural DGP is identical. + +The covariance comparisons printed above reveal two further insights. + +**State-noise $CC'$ vs. innovation covariance $G\Sigma G' + R$ (System 1).** + +The matrix $CC'$ is the unconditional covariance contributed by the structural +shocks to the state transition in one period. + +The innovation covariance $G\Sigma G' + R$ is the covariance of the one-step +forecast error in the *observable* vector after the Kalman filter has processed +all available information. + +In this example $GCC'G'$ has diagonal entries equal to $1$. + +The printed innovation covariance has diagonal entries slightly above $1$ because it also +contains residual uncertainty from estimating the hidden state and the small +measurement-error variance $R = 0.0001 I_2$. + +Thus $G\Sigma G' + R$ should not be compared directly with $GCC'G'$ as a +strict variance reduction. + +The variance reduction created by filtering is measured relative to the broader +forecast-error covariance before conditioning on the available observations. + +**System 1 covariance $\Sigma$ vs. System 2 covariance $\check\Sigma$.** + +Restricting observations to $r_t$ alone (System 2) yields less information about +the hidden state, so the filter is less precise: $\check\Sigma \succeq \Sigma$, +i.e., the difference $\check\Sigma - \Sigma$ is positive semi-definite. + +The printed difference matrix confirms this ordering for our parameter values. + +## Exercises + +```{exercise-start} +:label: kf_ex1 +``` + +Consider the scalar AR(1) state space system used above with $\rho = 0.9$, +$\sigma_w = 0.5$, $\sigma_v = 1.0$. + +Derive an algebraic expression for the **steady-state** conditional variance +$\Sigma_\infty$ by solving the scalar Riccati equation {eq}`eq:riccati` at its +fixed point $\Sigma_{t+1} = \Sigma_t = \Sigma$. + +Show that $\Sigma$ satisfies a quadratic equation, find its positive root, and +verify numerically that your formula matches `kf.Sigma_infinity`. + +```{exercise-end} +``` + +```{solution-start} kf_ex1 +:class: dropdown +``` + +Setting $\Sigma_{t+1} = \Sigma_t = \Sigma$ in the scalar version of +{eq}`eq:riccati` with $A = \rho$, $CC' = \sigma_w^2$, $GG' = 1$, +$R = \sigma_v^2$: + +$$ +\Sigma = \rho^2 \Sigma + \sigma_w^2 - \frac{\rho^2 \Sigma^2}{\Sigma + \sigma_v^2} +$$ + +Multiplying through by $\Sigma + \sigma_v^2$ and rearranging: + +$$ +\Sigma^2 + \left[\sigma_v^2(1-\rho^2) - \sigma_w^2\right]\Sigma + - \sigma_v^2 \sigma_w^2 = 0 +$$ + +Taking the positive root of this quadratic: + +$$ +\Sigma_\infty + = \frac{\sigma_w^2 - \sigma_v^2(1-\rho^2) + + \sqrt{\left[\sigma_v^2(1-\rho^2) - \sigma_w^2\right]^2 + + 4 \sigma_v^2 \sigma_w^2}}{2} +$$ + +```{code-cell} ipython3 +rho_, sigma_w_, sigma_v_ = 0.9, 0.5, 1.0 + +b = sigma_v_**2 * (1 - rho_**2) - sigma_w_**2 +discriminant = b**2 + 4 * sigma_v_**2 * sigma_w_**2 +Sigma_formula = (-b + np.sqrt(discriminant)) / 2 + +A_ = np.array([[rho_]]) +C_ = np.array([[sigma_w_]]) +G_ = np.array([[1.0]]) +R_ = np.array([[sigma_v_**2]]) +H_ = np.array([[sigma_v_]]) # R_ = H_ @ H_.T +lss_ = qe.LinearStateSpace(A_, C_, G_, H_, mu_0=np.zeros(1), Sigma_0=np.eye(1)) +kf_ = qe.Kalman(lss_) + +print(f"Analytical Σ_inf = {Sigma_formula:.8f}") +print(f"Numerical Σ_inf = {kf_.Sigma_infinity[0, 0]:.8f}") +``` + +```{solution-end} +``` + +```{exercise-start} +:label: kf_ex2 +``` + +**Bivariate example.** + +Consider a two-dimensional state with a one-dimensional observation: + +$$ +A = \begin{pmatrix} 0.9 & 0.1 \\ 0 & 0.8 \end{pmatrix}, \quad +C = \begin{pmatrix} 0.4 \\ 0.1 \end{pmatrix}, \quad +G = \begin{pmatrix} 1 & 0 \end{pmatrix}, \quad +R = [0.5] +$$ + +(a) Simulate $T = 500$ observations from this system starting from a diffuse prior. + +(b) Run the Kalman filter and plot both components of $\hat{x}_t$ against the + true hidden state path. + +(c) Compute and report the steady-state covariance $\Sigma_\infty$ and + Kalman gain $K_\infty$. + +(d) Check that the eigenvalues of $A - K_\infty G$ lie strictly inside the + unit circle, confirming that the VAR representation {eq}`eq:var1` is stable. + +```{exercise-end} +``` + +```{solution-start} kf_ex2 +:class: dropdown +``` + +```{code-cell} ipython3 +A2 = np.array([[0.9, 0.1], + [0.0, 0.8]]) +C2 = np.array([[0.4], + [0.1]]) +G2 = np.array([[1.0, 0.0]]) +R2 = np.array([[0.5]]) +H2 = np.array([[np.sqrt(0.5)]]) # R2 = H2 @ H2.T + +lss2 = qe.LinearStateSpace(A2, C2, G2, H2, + mu_0=np.zeros(2), + Sigma_0=np.eye(2) * 5.0) +kf2 = qe.Kalman(lss2) +kf2.set_state(np.zeros(2), np.eye(2) * 5.0) + +T2 = 500 +x2_path, y2_path = lss2.simulate(ts_length=T2, random_state=0) + +x_hats2 = np.zeros((T2, 2)) +for t in range(T2): + kf2.update(y2_path[:, t]) + x_hats2[t] = kf2.x_hat.ravel() + +fig, axes = plt.subplots(2, 1, figsize=(10, 6), sharex=True) +for i, ax in enumerate(axes): + ax.plot(x2_path[i, :T2], lw=1.2, label=f'True $x_{{{i+1},t}}$') + ax.plot(x_hats2[:, i], lw=1.2, ls='--', label=rf'$\hat{{x}}_{{{i+1},t}}$') + ax.legend(fontsize=9) + ax.set_ylabel(f'component {i+1}') +axes[1].set_xlabel('time $t$') +plt.suptitle('Kalman filter: bivariate hidden state') +plt.tight_layout() +plt.show() + +# (c) Steady-state values +Sigma2_inf, K2_inf = kf2.stationary_values() +print("Steady-state covariance Σ_inf:") +print(np.round(Sigma2_inf, 5)) +print("\nSteady-state Kalman gain K_inf:") +print(np.round(K2_inf, 5)) + +# (d) Eigenvalues of A - K_inf G +AKG2 = A2 - K2_inf @ G2 +eigvals2 = np.linalg.eigvals(AKG2) +print(f"\nEigenvalues of A - K_inf G: {np.round(eigvals2, 5)}") +print(f"Stable VAR: {np.all(np.abs(eigvals2) < 1)}") +``` + +```{solution-end} +``` + +```{exercise-start} +:label: kf_ex3 +``` + +**Likelihood and parameter estimation.** + +Using the scalar model from the main text with true parameters +$(\rho, \sigma_w, \sigma_v) = (0.9, 0.5, 1.0)$: + +(a) Simulate $T = 300$ observations. + +(b) Write a function that evaluates the **log-likelihood** as a function of + $\rho \in (0, 1)$, holding $\sigma_w = 0.5$ and $\sigma_v = 1.0$ fixed, + and plot the log-likelihood against $\rho$ for a grid of values. + +(c) Locate the maximum numerically and check that it is close to the true value + $\rho = 0.9$. + +```{exercise-end} +``` + +```{solution-start} kf_ex3 +:class: dropdown +``` + +```{code-cell} ipython3 +# True parameters +rho_true, sw_true, sv_true = 0.9, 0.5, 1.0 + +A_t = np.array([[rho_true]]) +C_t = np.array([[sw_true]]) +G_t = np.array([[1.0]]) +R_t = np.array([[sv_true**2]]) +H_t = np.array([[sv_true]]) # R_t = H_t @ H_t.T + +lss_t = qe.LinearStateSpace(A_t, C_t, G_t, H_t, + mu_0=np.zeros(1), Sigma_0=np.eye(1)) +_, y_sim = lss_t.simulate(ts_length=300, random_state=7) +y_sim = y_sim.T # shape (300, 1) + +def ll_rho(rho_val): + A_ = np.array([[rho_val]]) + C_ = np.array([[sw_true]]) + G_ = np.array([[1.0]]) + R_ = np.array([[sv_true**2]]) + return log_likelihood(A_, C_, G_, R_, y_sim, + np.zeros(1), np.eye(1) * 10.0) + +rho_grid = np.linspace(0.5, 0.99, 60) +ll_vals = np.array([ll_rho(r) for r in rho_grid]) + +rho_mle = rho_grid[np.argmax(ll_vals)] + +fig, ax = plt.subplots(figsize=(8, 4)) +ax.plot(rho_grid, ll_vals) +ax.axvline(rho_true, color='k', ls='--', label=f'True ρ = {rho_true}') +ax.axvline(rho_mle, color='tab:red', ls=':', label=f'MLE ρ_hat = {rho_mle:.3f}') +ax.set_xlabel(r'$\rho$') +ax.set_ylabel('log-likelihood') +ax.set_title('Profile log-likelihood as a function of $\\rho$') +ax.legend() +plt.tight_layout() +plt.show() + +print(f"True ρ = {rho_true}, MLE ρ_hat = {rho_mle:.4f}") +``` + +```{solution-end} +``` diff --git a/lectures/lq_consumption_smoothing.md b/lectures/lq_consumption_smoothing.md new file mode 100644 index 000000000..410138279 --- /dev/null +++ b/lectures/lq_consumption_smoothing.md @@ -0,0 +1,2149 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.11.1 +kernelspec: + display_name: Python 3 + language: python + name: python3 +--- + +# LQ Consumption Smoothing: Incomplete Markets, Complete Markets, and Robust Control + + +```{index} single: LQ Permanent Income Model +``` + +```{index} single: Consumption Smoothing +``` + +## Overview + +This lecture describes two representations of a linear-quadratic (LQ) permanent income model, then +extends the model to incorporate a consumer's concern for robustness to misspecification of the +stochastic process governing labor income. + +**Part A** covers the standard LQ permanent income model, a rational-expectations version of the +theories of {cite}`Friedman1956` and {cite}`Hall1978`. + +We use the model to illustrate: + +- impulse response functions +- alternative state-space representations of the optimal decision rule +- cointegration of consumption and assets +- a "borrowers and lenders" closed economy (Bewley model) +- complete-markets consumption smoothing + +**Part B** covers a robust version of the permanent income model due to {cite}`HST_1999` +(HST) and {cite}`HansenSargent2008`. A consumer who distrusts his specification of the labor income process engages in a form of +precautionary savings even when preferences are quadratic. + +We show: + +- how a concern for robustness is observationally equivalent (for quantities) to an increase in + impatience +- how the worst-case model distorts the endowment process toward greater persistence +- frequency-domain and detection-error-probability characterizations of the size of model + uncertainty + +**Synthesis: a Bewley model with heterogeneous beliefs and discount factors.** The lecture +concludes by combining Parts A and B in a single general-equilibrium model. + +Using tools that we borrow from {cite}`HansenSargent2008`, we show: + +- how a continuum of consumers can differ in their robustness parameter $\sigma^i \leq 0$ and + their discount factor $\beta^i$, with each pair lying on the observational-equivalence locus + from Part B +- how every such consumer nevertheless chooses the **same consumption-saving rule** as the + plain-vanilla $(\sigma = 0, \beta_0)$ agent of Part A +- how the equilibrium interest rate $R = \beta_0^{-1}$ and all aggregate dynamics therefore + coincide with those of the Part A Bewley model +- how agents can hold genuinely different internal (worst-case) models of their income process + while remaining observationally indistinguishable in quantities + +Throughout, we set $\beta R = 1$, so the consumer's subjective discount factor equals the bond price. + +Let's begin with some imports. + +```{code-cell} ipython3 +import numpy as np +import matplotlib.pyplot as plt +from scipy.linalg import solve, inv, solve_discrete_lyapunov +from scipy.stats import norm + +``` + +--- + +## Part A: the standard LQ permanent income model + +```{index} single: LQ Permanent Income Model; standard +``` + +### Setup + +```{index} single: Permanent Income Hypothesis; Friedman +``` + +A consumer has preferences over consumption streams ordered by + +$$ +\mathbb{E}_0 \sum_{t=0}^{\infty} \beta^t u(c_t) +$$ (eq:sprob1) + +where $\mathbb{E}_t$ is the mathematical expectation conditioned on the consumer's time-$t$ information, +$c_t$ is time-$t$ consumption, $u(c)$ is a strictly concave one-period utility function, and +$\beta \in (0,1)$ is a discount factor. + +The consumer maximizes {eq}`eq:sprob1` by choosing a plan +$\{c_t, b_{t+1}\}_{t=0}^{\infty}$ subject to the sequence of budget constraints + +$$ +c_t + b_t = R^{-1} b_{t+1} + y_t, \quad t \geq 0 +$$ (eq:sprob2) + +where $\{y_t\}$ is an exogenous stationary endowment process, $R$ is a constant gross risk-free +interest rate, $b_t$ is a one-period risk-free bond maturing at $t$, and $b_0$ is a given +initial condition. + +```{note} +For $t \geq 1$, $b_t$ is chosen at time $t-1$. The bond $b_t > 0$ represents debt owed by +the consumer at the start of period $t$. +``` + +We assume $R^{-1} = \beta$. + +The endowment process has the state-space representation + +$$ +\begin{aligned} +z_{t+1} &= \check{A}\, z_t + \check{C}\, w_{t+1} \\ +y_t &= \check{G}\, z_t +\end{aligned} +$$ (eq:sprob15) + +where $w_{t+1}$ is i.i.d. with mean zero and identity covariance matrix, $\check{A}$ is a stable +matrix (eigenvalues strictly less than one in modulus), and $\check{G}$ is a row vector. + +The state confronting the household at $t$ is +$\bigl[b_t \;\; z_t'\bigr]'$, where $b_t$ is its one-period debt due at the start of period $t$ +and $z_t$ contains all variables useful for forecasting its future endowment. + +To make the problem linear-quadratic, we adopt the **quadratic utility function** + +$$ +u(c_t) = -\tfrac{1}{2}(c_t - \gamma)^2 +$$ + +where $\gamma > 0$ is a bliss level of consumption. + +We allow $c_t$ to be negative (a producer +rather than a consumer). + +We impose the **transversality condition** + +$$ +\mathbb{E}_0 \sum_{t=0}^{\infty} \beta^t b_t^2 < +\infty +$$ (eq:sprob3) + +which rules out Ponzi schemes. + +### Euler equation and certainty equivalence + +With quadratic utility, the first-order conditions for the consumer's problem imply the **martingale +Euler equation** + +$$ +\mathbb{E}_t c_{t+1} = c_t +$$ (eq:sprob5) + +```{note} +Equation {eq}`eq:sprob5` says that consumption is a martingale. This is the key implication of +the LQ permanent income model. It contrasts with models that have convex marginal utility +($u''' > 0$), where consumption is instead a submartingale. +``` + +The problem satisfies a **certainty-equivalence** property: one can find the optimal plan by (1) +solving the problem under perfect foresight to express $c_t$ as a function of $b_t$ and the entire +future path $\{y_{t+j}\}_{j=0}^{\infty}$, and then (2) replacing those future values with +$\{\mathbb{E}_t y_{t+j}\}_{j=0}^{\infty}$. + +### The optimal consumption function + +Solving the budget constraint {eq}`eq:sprob2` forward, imposing the transversality condition, and +taking conditional expectations gives + +$$ +b_t = \sum_{j=0}^{\infty} \beta^j \mathbb{E}_t y_{t+j} - \frac{1}{1-\beta} c_t +$$ (eq:sprob7) + +Rearranging yields the **consumption function** + +$$ +c_t = (1-\beta)\!\left[\sum_{j=0}^{\infty} \beta^j \mathbb{E}_t y_{t+j} - b_t\right] +$$ (eq:sprob8) + +Equivalently, with net interest rate $r$ defined by $\beta = 1/(1+r)$, + +$$ +c_t = \frac{r}{1+r}\!\left[\sum_{j=0}^{\infty} \beta^j \mathbb{E}_t y_{t+j} - b_t\right] +$$ (eq:sprob9) + +Consumption equals $r/(1+r)$ times total wealth, where total wealth is the sum of human wealth +$\sum_{j=0}^{\infty}\beta^j \mathbb{E}_t y_{t+j}$ and financial wealth $-b_t$. + +Using the state-space representation {eq}`eq:sprob15` to evaluate the geometric sum of expected +future endowments, + +$$ +\sum_{j=0}^{\infty} \beta^j \mathbb{E}_t y_{t+j} = \check{G}(I - \beta \check{A})^{-1} z_t +$$ (eq:discount1) + +we obtain the **Lucas-critique-respecting consumption function** + +$$ +c_t = (1-\beta)\!\left[\check{G}(I-\beta\check{A})^{-1} z_t - b_t\right] +$$ (eq:lccf) + +This expresses $c_t$ as a function of the state $[b_t,\, z_t']'$ that confronts the household. + +### Representation 1: state $(b_t, z_t)$ + +Combining the endowment law of motion with the optimal debt dynamics (derived by substituting +{eq}`eq:lccf` into {eq}`eq:sprob2`) gives the first system representation: + +$$ +\begin{aligned} +z_{t+1} &= \check{A}\, z_t + \check{C}\, w_{t+1} \\ +b_{t+1} &= b_t + \check{G}\bigl[(I - \beta\check{A})^{-1}(\check{A}-I)\bigr] z_t \\ +y_t &= \check{G}\, z_t \\ +c_t &= (1-\beta)\!\left[\check{G}(I-\beta\check{A})^{-1} z_t - b_t\right] +\end{aligned} +$$ (eq:rep1) + +In this representation the **exogenous** state is $z_t$ and the **endogenous** state is $b_t$. + +### Representation 2: state $(c_t, z_t)$ + +{cite}`Hall1978` showed that the LQ permanent income model implies a particularly sharp +state-space representation in which the state consists of current consumption $c_t$ and the +exogenous endowment state $z_t$, with assets $b_t$ becoming an outcome rather than a state +variable. + +**Consumption innovation.** Shifting {eq}`eq:sprob8` forward, eliminating $b_{t+1}$ via +{eq}`eq:sprob2`, and rearranging yields + +$$ +c_{t+1} - c_t = (1-\beta)\sum_{j=0}^{\infty} \beta^j \bigl(\mathbb{E}_{t+1} y_{t+j+1} - \mathbb{E}_t y_{t+j+1}\bigr) +$$ (eq:sprob11) + +The right-hand side is $(1-\beta)$ times the time-$(t+1)$ **innovation** to the expected present +value of the endowment stream. + +Suppose the endowment has the moving-average representation + +$$ +y_{t+1} = d(L)\, w_{t+1}, \qquad d(L) = \sum_{j=0}^{\infty} d_j L^j +$$ (eq:sprob12) + +where $d(L) = \check{G}(I - \check{A} L)^{-1}\check{C}$. Then + +$$ +\mathbb{E}_{t+1} y_{t+j} - \mathbb{E}_t y_{t+j} = d_{j-1}\, w_{t+1} +$$ (eq:sprob120) + +Substituting {eq}`eq:sprob120` into {eq}`eq:sprob11` gives the key result + +$$ +c_{t+1} - c_t = (1-\beta)\, d(\beta)\, w_{t+1} +$$ (eq:sprob13) + +where $d(\beta) = \check{G}(I-\beta\check{A})^{-1}\check{C}$ is the **present value of the +moving-average coefficients**. Consumption is a **random walk** with innovation +$(1-\beta)d(\beta)w_{t+1}$. + +**Full second representation.** Combining {eq}`eq:sprob13` and {eq}`eq:sprob7` gives + +$$ +\begin{aligned} +c_{t+1} &= c_t + (1-\beta)\,\check{G}(I-\beta\check{A})^{-1}\check{C}\, w_{t+1} \\ +b_t &= \check{G}(I-\beta\check{A})^{-1} z_t - \frac{1}{1-\beta}\,c_t \\ +y_t &= \check{G}\, z_t \\ +z_{t+1} &= \check{A}\, z_t + \check{C}\, w_{t+1} +\end{aligned} +$$ (eq:sprob16) + +This representation reveals several important features of the optimal decision rule: + +1. **State**: The state consists of the endogenous component $c_t$ and the exogenous component + $z_t$. Financial assets $b_t$ have disappeared as a state variable because they are encoded + in $c_t$. + +2. **Random walk**: Consumption is a random walk with innovation $(1-\beta)d(\beta)w_{t+1}$, which + confirms that the Euler equation {eq}`eq:sprob5` is built into the solution. The random-walk + property implies that consumption has no asymptotic stationary distribution. + +3. **Box impulse response**: For all $j \geq 1$, the response of $c_{t+j}$ to the innovation + $w_{t+1}$ is the constant $(1-\beta)d(\beta)$, giving a "box-shaped" impulse response. + +4. **Cointegration**: Both $c_t$ and $b_t$ are nonstationary (unit-root processes), but the + linear combination $(1-\beta)b_t + c_t$ is stationary. From {eq}`eq:sprob7`, + +$$ +(1-\beta)b_t + c_t = (1-\beta)\mathbb{E}_t\sum_{j=0}^{\infty}\beta^j y_{t+j} +$$ (eq:cointegration) + + The left side is the cointegrating residual, which equals the consumer's expected present value + of future income. + +### Debt dynamics + +```{index} single: History Dependence +``` + +Subtracting {eq}`eq:sprob16` (equation for $b_t$) at time $t$ from the same equation at time $t+1$ +and substituting gives + +$$ +b_{t+1} - b_t = \check{G}(I-\beta\check{A})^{-1}(\check{A}-I)\, z_t +$$ (eq:debt_evolution) + +This shows that $b_{t+1}$ is **predetermined** at time $t$ as a function of $z_t$ alone. +Solving backward from any $t$, $b_t$ depends on the entire history $z^{t-1} = [z_{t-1},\ldots,z_0]$ +and the initial condition $b_0$. This **history dependence** is a hallmark of incomplete markets. + +### Two classic examples + +```{index} single: Permanent Income Model; examples +``` + +We illustrate formulas {eq}`eq:sprob16` with two examples. + +In both, the endowment is +$y_t = z_{1t} + z_{2t}$, where + +$$ +\begin{pmatrix}z_{1,t+1}\\z_{2,t+1}\end{pmatrix} += +\begin{pmatrix}1 & 0\\0 & 0\end{pmatrix} +\begin{pmatrix}z_{1t}\\z_{2t}\end{pmatrix} ++ +\begin{pmatrix}\sigma_1 & 0\\0 & \sigma_2\end{pmatrix} +\begin{pmatrix}w_{1,t+1}\\w_{2,t+1}\end{pmatrix} +$$ (eq:twofactor) + +Here $z_{1t}$ is a **permanent** component of $y_t$ and $z_{2t}$ is a **purely transitory** +component. + +**Example 1 (full information).** The consumer observes the state $z_t$ at time $t$, so he +can reconstruct $w_{t+1}$ from $z_{t+1}$ and $z_t$. + +Applying {eq}`eq:sprob16`: + +$$ +c_{t+1} - c_t = \sigma_1 w_{1,t+1} + (1-\beta)\,\sigma_2\, w_{2,t+1} +$$ (eq:consexample1) + +A unit increment to the permanent component $z_{1t}$ raises consumption **one-for-one** permanently +and causes **zero net saving**. + +A unit increment to the purely transitory component raises +consumption by only the fraction $(1-\beta)$ permanently, while the remaining fraction $\beta$ is +saved. + +From {eq}`eq:debt_evolution`: + +$$ +b_{t+1} - b_t = -z_{2t} = -\sigma_2 w_{2t} +$$ (eq:consexample1a) + +confirming that none of the permanent shock is saved, while all of the transitory shock is saved. + +**Example 2 (partial information / Muth model).** The consumer observes $y_t$ and its history, +but not $z_{1t}$ and $z_{2t}$ separately. + +The appropriate approach uses an **innovations +representation** derived by the Kalman filter. + +At the Kalman filter steady state, the **Kalman gain** $K \in [0,1]$ satisfies + +$$ +K = \frac{\Sigma}{\Sigma + \sigma_2^2}, \qquad \Sigma = \frac{\sigma_1^2 + \sqrt{\sigma_1^4 + 4\sigma_1^2\sigma_2^2}}{2} +$$ (eq:kalmangain) + +where $K$ increases with the ratio $\sigma_1^2/\sigma_2^2$ (the variance of the permanent shock +relative to the transitory shock). + +The innovations representation expresses the endowment as an ARMA(1,1) in +its own innovation $a_t = y_t - \mathbb{E}[y_t \mid y^{t-1}]$ (the one-step-ahead forecast error): + +$$ +y_{t+1} = y_t - (1-K)\,a_t + a_{t+1} +$$ (eq:muth_innov) + +Here the coefficient $-(1-K)$ on the lagged innovation reflects that only the fraction +$K$ of last period's surprise was treated as permanent; the remainder mean-reverts. + +The scalar +$a_t$ is i.i.d. with variance $\Sigma + \sigma_2^2$. +Applying {eq}`eq:sprob16` to this innovation representation: + +$$ +c_{t+1} - c_t = [1 - \beta(1-K)]\, a_{t+1} +$$ (eq:consexample2) + +The consumer regards a fraction $K$ of the innovation $a_{t+1}$ as permanent and fraction $1-K$ +as transitory. + +He permanently increments consumption by $K + (1-\beta)(1-K) = 1 - \beta(1-K)$ +of $a_{t+1}$ and saves the remaining fraction $\beta(1-K)$. + +The first difference of income obeys a first-order moving average: + +$$ +y_{t+1} - y_t = a_{t+1} - (1-K)\,a_t +$$ (eq:incomemaar) + +while the first difference of consumption is i.i.d. + +### Numerical illustration of the two examples + +```{code-cell} ipython3 +# Parameters +β = 0.95 # discount factor (so R = 1/β) +σ1 = 0.15 # std of permanent shock +σ2 = 0.30 # std of transitory shock + +# -- Example 1: full information ---------------------------------------------- +A_check = np.array([[1.0, 0.0], + [0.0, 0.0]]) +C_check = np.array([[σ1, 0.0], + [0.0, σ2]]) +G_check = np.array([[1.0, 1.0]]) + +# Key matrix M = G(I - βA)^{-1} +IbA = np.eye(2) - β * A_check +M = G_check @ inv(IbA) # shape (1, 2) + +# Impulse response of consumption (permanent to unit permanent / transitory shock) +h = (1 - β) * M @ C_check # shape (1, 2) +irf_perm_ex1 = h[0, 0] / σ1 # response per unit std of permanent shock +irf_trans_ex1 = h[0, 1] / σ2 # response per unit std of transitory shock + +print("Example 1 (full information)") +print(f" IRF c to permanent shock (normalised): {irf_perm_ex1:.4f} (theory: 1.0)") +print(f" IRF c to transitory shock (normalised): {irf_trans_ex1:.4f} " + f"(theory: {1-β:.4f})") +``` + +```{code-cell} ipython3 +# -- Example 2: partial information / Muth model ------------------------------- +Σ = (σ1**2 + np.sqrt(σ1**4 + 4 * σ1**2 * σ2**2)) / 2 +K = Σ / (Σ + σ2**2) + +print("Example 2 (partial information)") +print(f" Steady-state Kalman gain K = {K:.4f}") +print(f" IRF c to unit innovation a_{{t+1}}: {1 - β*(1-K):.4f}") +print(f" Fraction of innovation treated as permanent (K): {K:.4f}") +print(f" Fraction saved: β(1-K) = {β*(1-K):.4f}") +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Consumption impulse responses in the two examples + name: fig-lqcs-irf-examples +--- +# -- Compare impulse responses across examples --------------------------------- +T = 30 +irf_c_ex1_perm = np.ones(T) * irf_perm_ex1 * σ1 +irf_c_ex1_trans = np.ones(T) * irf_trans_ex1 * σ2 + +irf_c_ex2 = np.ones(T) * (1 - β * (1 - K)) # per unit innovation a_t + +fig, axes = plt.subplots(1, 2, figsize=(12, 4)) + +axes[0].axhline(0, color='k', linewidth=0.8) +axes[0].step(range(T), irf_c_ex1_perm, where='post', + label='permanent shock ($z_1$)', color='steelblue') +axes[0].step(range(T), irf_c_ex1_trans, where='post', + label='transitory shock ($z_2$)', color='tomato', linestyle='--') +axes[0].set_title('Example 1: IRF of $c$ (full information)') +axes[0].set_xlabel('periods after shock') +axes[0].set_ylabel('response of $c$') +axes[0].legend() +axes[0].grid(alpha=0.3) + +axes[1].axhline(0, color='k', linewidth=0.8) +axes[1].step(range(T), irf_c_ex2, where='post', + label=f'unit innovation $a_{{t+1}}$ (K = {K:.2f})', + color='purple') +axes[1].set_title('Example 2: IRF of $c$ (partial information)') +axes[1].set_xlabel('periods after shock') +axes[1].set_ylabel('response of $c$') +axes[1].legend() +axes[1].grid(alpha=0.3) + +plt.tight_layout() +plt.show() +``` + +```{note} +The impulse responses have the "box" shape characteristic of the LQ permanent income model: once +a shock occurs, consumption shifts permanently to a new level and stays there. +``` + +### Spreading consumption cross sections + +```{index} single: Cross-Section Distributions; consumption +``` + +The unit root in consumption (Representation 2) causes the **cross-section variance** of +consumption to grow linearly with age. + +Consider a continuum of *ex ante* identical households born at $t = 0$. + +Each household $i$ has +the same preferences and the same stochastic income process, but faces **idiosyncratic** shocks +$w_{t+1}^i$. + +Let all households start from the same initial conditions $c_0^i = c_0$ and $z_0^i$. + +From {eq}`eq:sprob16`, household $i$'s consumption follows + +$$ +c_{t+1}^i = c_t^i + h\, w_{t+1}^i, \qquad h = (1-\beta)\,\check{G}(I-\beta\check{A})^{-1}\check{C} +$$ + +Since the $w^i_{t+1}$ are independent across agents, + +$$ +\mathbb{E}_0\bigl(c_t^i - c_0^i\bigr)^2 = t\, h h' +$$ (eq:varspread) + +In the two-factor model, $h$ is a $1 \times 2$ row vector so $hh'$ is a positive scalar equal to +$\sigma_1^2 + (1-\beta)^2\sigma_2^2$. + +The cross-section variance of consumption grows like $t$. + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Spreading consumption cross sections + name: fig-lqcs-spread +--- +# Simulate cross-section spreading +rng = np.random.default_rng(42) +N = 5000 # number of agents +T_sim = 80 # number of periods + +h_vec = (1 - β) * (M @ C_check) # shape (1, 2), then flatten +h_vec = h_vec.flatten() # h = [h1, h2] + +c = np.zeros((N, T_sim + 1)) # consumption paths +# initialise all agents at c_0 = 0 (demeaned) +for t in range(T_sim): + eps = rng.standard_normal((N, 2)) # N draws of 2D shock + dc = eps @ h_vec # shape (N,) + c[:, t+1] = c[:, t] + dc + +# Cross-section variance at each date +var_c = np.var(c, axis=0) +theory = np.arange(T_sim + 1) * np.dot(h_vec, h_vec) + +fig, ax = plt.subplots() +ax.plot(var_c, label='simulated cross-section variance', linewidth=2) +ax.plot(theory, label=r'theoretical: $t \cdot h h^\prime$', + linestyle='--', color='tomato') +ax.set_xlabel('period $t$') +ax.set_ylabel('cross-section variance of $c$') +ax.legend() +ax.grid(alpha=0.3) +plt.show() +``` + +--- + +### A "borrowers and lenders" closed economy (Bewley model) + +```{index} single: Bewley Model +``` + +Up to now we have set $R = \beta^{-1}$ and taken it as determined outside the model ("small open +economy"). + +Following ideas of {cite}`Bewley1977`, we can construct a **closed economy** in which +$R = \beta^{-1}$ is an **equilibrium outcome**. + +**Environment.** A continuum of measure one of consumers, indexed by $i \in [0,1]$, trade a +risk-free one-period bond with price $\beta$. + +All consumers have the same preferences and the +same stochastic income process, but face **idiosyncratic** income shocks. + +Initial bond positions +are zero: $b_0^i = 0$ for all $i$. + +Initial endowment states $z_0^i$ are independent draws from +the stationary distribution of {eq}`eq:sprob15`. + +**Individual decisions.** From {eq}`eq:lccf`, with $b_0^i = 0$, agent $i$'s time-0 consumption +is + +$$ +c_0^i = (1-\beta)\,\check{G}(I-\beta\check{A})^{-1} z_0^i +$$ (eq:c_null) + +For $t \geq 1$, from {eq}`eq:sprob16`: + +$$ +c_{t+1}^i = c_t^i + h\, w_{t+1}^i, \qquad h = (1-\beta)\,\check{G}(I-\beta\check{A})^{-1}\check{C} +$$ (eq:c_future) + +**Market clearing.** Let $Y$ denote the stationary mean of the cross-section average of +non-financial income. Integrating {eq}`eq:c_null` over all agents: + +$$ +\int_0^1 c_0^i\, di = (1-\beta)\sum_{j=0}^{\infty}\beta^j \mathbb{E}_0\!\int_0^1 y_j^i\, di = Y +$$ (eq:c_marketclear_0) + +because the continuum of idiosyncratic shocks averages out. + +For future periods, integrating +{eq}`eq:c_future`: + +$$ +\int_0^1 c_{t+1}^i\, di = \int_0^1 c_t^i\, di + h\!\underbrace{\int_0^1 w_{t+1}^i\, di}_{=\,0} = Y +$$ + +The goods market clears at every date at **constant** aggregate consumption equal to $Y$. + +The +bond market clears at zero net supply each period. Thus $R = \beta^{-1}$ is an equilibrium +outcome: we have constructed a Bewley model. + +**Cross-section inequality.** While the cross-section mean of consumption is constant, the +cross-section **variance** grows without bound according to {eq}`eq:varspread`. Initial +differences in endowment draws $z_0^i$ create permanent differences in consumption levels, and +idiosyncratic shocks create ongoing divergence. + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Bewley economy cross-section moments + name: fig-lqcs-bewley +--- +# Verify Bewley market clearing via simulation +# We use an online (Welford) accumulator for mean and variance so that we never +# store the full NxT consumption array in memory. +rng = np.random.default_rng(0) +N_bew = 10000 # agents - enough to illustrate the law of large numbers +T_bew = 60 + +# Draw initial z^i_0; z1 is a random walk so its stationary distribution is +# degenerate - we draw from a distribution with std 1 for illustration. +z0_i = rng.standard_normal((N_bew, 2)) * np.array([1.0, σ2]) +c0_i = ((1 - β) * (M @ z0_i.T)).flatten() # shape (N_bew,) + +# Online accumulation: propagate c_t across agents without storing all paths. +mean_c = np.zeros(T_bew + 1) +var_c2 = np.zeros(T_bew + 1) +mean_c[0] = c0_i.mean() +var_c2[0] = c0_i.var() + +c_now = c0_i.copy() +for t in range(T_bew): + eps = rng.standard_normal((N_bew, 2)) + c_now = c_now + eps @ h_vec + mean_c[t + 1] = c_now.mean() + var_c2[t + 1] = c_now.var() + +# Keep c0_i available for the complete-markets figure below +c_bew_t0 = c0_i + +fig, axes = plt.subplots(1, 2, figsize=(12, 4)) + +axes[0].plot(mean_c, linewidth=2, color='steelblue') +axes[0].axhline(mean_c[0], linestyle='--', color='tomato', label='initial mean') +axes[0].set_title('Cross-section mean of $c$ (Bewley economy)') +axes[0].set_xlabel('period $t$') +axes[0].set_ylabel('mean consumption') +axes[0].legend() +axes[0].grid(alpha=0.3) + +axes[1].plot(var_c2, linewidth=2, color='steelblue', label='simulated variance') +axes[1].set_title('Cross-section variance of $c$ (Bewley economy)') +axes[1].set_xlabel('period $t$') +axes[1].set_ylabel('variance of consumption') +axes[1].legend() +axes[1].grid(alpha=0.3) + +plt.tight_layout() +plt.show() +``` + +**An inefficiency.** Because each consumer dislikes variation of consumption over time, each +consumer would prefer a completely smoothed stream $c_t^i = c_0^i$ for all $t$. Such an +allocation is feasible (the cross-section average of income is constant), and it is **Pareto +superior** to the incomplete-markets equilibrium. The next section describes the complete-markets +allocation that achieves this. + +--- + +### Consumption smoothing with complete markets + +```{index} single: Complete Markets; Arrow securities +``` + +We replace the single bond with a **complete set of Arrow securities**. The budget constraint +becomes + +$$ +c_t + b_{t-1}(z_t) = \int q(z_{t+1}|z_t)\, b_t(z_{t+1})\, dz_{t+1} + y_t +$$ (eq:CMbudget) + +where $q(z_{t+1}|z_t)$ is the pricing kernel for one-period state-contingent claims and +$b_t(z_{t+1})$ is the household's portfolio of Arrow securities chosen at $t$. + +**Equilibrium pricing kernel.** We conjecture (and verify) that the equilibrium pricing kernel is + +$$ +q(z_{t+1}|z_t) = \beta\,\phi(z_{t+1}|z_t) +$$ (eq:kernel) + +where $\phi(z_{t+1}|z_t)$ is the transition density of $z$. This kernel prices a one-period +risk-free bond at $\beta$, so $R = \beta^{-1}$, consistent with the incomplete-markets +equilibrium. + +**Constant consumption conjecture.** We conjecture that the equilibrium delivers each consumer +$i$ a **constant** consumption level: + +$$ +c_t^i = \bar{c}^i = c_0^i, \quad \forall\, t \geq 0 +$$ (eq:constcons) + +where $c_0^i = (1-\beta)\,\check{G}(I-\beta\check{A})^{-1} z_0^i$ is the consumer's time-0 +consumption in the incomplete-markets economy. + +**Supporting portfolio.** The state-contingent debt that supports constant consumption is + +$$ +b_{t-1}(z_t) = \check{G}(I-\beta\check{A})^{-1} z_t - \frac{1}{1-\beta}\,\bar{c}^i \;\equiv\; b(z_t, \bar{c}^i) +$$ (eq:cmdebt) + +Note that indebtedness depends only on the current Markov state $z_t$, **not** on the history of +earlier states. This absence of history dependence reflects the **complete risk sharing** attained +under complete markets. + +Substituting the pricing kernel {eq}`eq:kernel` and the portfolio conjecture {eq}`eq:cmdebt` into +the budget constraint {eq}`eq:CMbudget` and using the law of iterated expectations confirms that +the budget constraint simplifies to $c_t = \bar{c}^i$ in every state and period. + +**Cross-section implications.** Under complete markets: + +- The cross-section distribution of consumption is **time-invariant**. +- Consumer $i$'s rank in the consumption distribution is fixed forever. +- A lucky initial draw $z_0^i$ manifests itself as perpetually high consumption $\bar{c}^i$ and + lower indebtedness $b(z_t^i, \bar{c}^i)$ across all future states. + +This contrasts sharply with the incomplete-markets Bewley economy, where the cross-section variance +of consumption grows without bound. + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Complete vs incomplete markets consumption distributions + name: fig-lqcs-markets +--- +# Illustrate: complete vs incomplete markets consumption distributions over time +rng = np.random.default_rng(1) +N_cm = 5000 +T_cm = 50 + +# Initial consumption draws (same as Bewley economy) +c0_cm = c_bew_t0[:N_cm] # initial consumption draws from the Bewley cell above + +# Incomplete markets: consumption evolves (random walk) +c_inc = np.zeros((N_cm, T_cm + 1)) +c_inc[:, 0] = c0_cm +for t in range(T_cm): + eps = rng.standard_normal((N_cm, 2)) + c_inc[:, t+1] = c_inc[:, t] + eps @ h_vec + +# Complete markets: consumption stays constant +c_comp = np.tile(c0_cm[:, np.newaxis], T_cm + 1) + +fig, axes = plt.subplots(1, 2, figsize=(12, 4)) + +for t_plot, color in zip([0, 10, 30, 50], ['steelblue', 'orange', 'tomato', 'purple']): + axes[0].hist(c_inc[:, t_plot], bins=60, alpha=0.4, + label=f't = {t_plot}', color=color, density=True) +axes[0].set_title('Incomplete markets:\ncross-section distribution of $c$') +axes[0].set_xlabel('$c$') +axes[0].legend(fontsize=9) +axes[0].grid(alpha=0.3) + +for t_plot, color in zip([0, 10, 30, 50], ['steelblue', 'orange', 'tomato', 'purple']): + axes[1].hist(c_comp[:, t_plot], bins=60, alpha=0.4, + label=f't = {t_plot}', color=color, density=True) +axes[1].set_title('Complete markets:\ncross-section distribution of $c$') +axes[1].set_xlabel('$c$') +axes[1].legend(fontsize=9) +axes[1].grid(alpha=0.3) + +plt.tight_layout() +plt.show() +``` + +```{note} +Under **complete markets** the histogram stays the same across all $t$ (distributions overlap +perfectly), while under **incomplete markets** the distribution spreads out over time. +``` + +--- + +**Connecting Part A to Part B.** Part A derived optimal consumption rules for a consumer who +fully trusts his stochastic income model. Part B relaxes that assumption: the consumer fears that +the model is misspecified and therefore seeks decision rules that are robust to plausible +alternatives. We will see that the optimal robust rule takes the same form as the Part A rule, +but under a distorted model of the income process that looks more persistent than the approximating +one. + +--- + +## Part B: a robust permanent income model + +```{index} single: Robust Control; permanent income +``` + +```{index} single: Precautionary Savings; robustness +``` + +### Introduction + +Part B studies a consumer who **distrusts** his specification of the stochastic process governing +his labor income. The model is due to Hansen, Sargent, and Tallarini (1999) (HST) {cite}`HST_1999`, who estimated +it on US quarterly consumption and investment data. + +A consumer who fears model misspecification engages in a form of **precautionary savings** that is +distinct from the usual precautionary motive (which requires a convex marginal utility). + +Here, the +precautionary motive arises because the consumer wants to protect against misspecification of the +**conditional means** of income shocks, and it operates even with quadratic preferences. + +HST showed an important **observational equivalence** result: for quantities $(c_t, i_t)$ alone, +a concern for robustness is indistinguishable from an increase in impatience (a decrease in +$\beta$). + +We develop this result carefully below. + +```{index} single: Observational Equivalence; robustness and discounting +``` + +**Outline of Part B:** + +1. State the HST model (planner's Bellman equation with robustness). +2. Solve the $\sigma = 0$ benchmark. +3. Prove observational equivalence (Theorem 1 and Theorem 2). +4. Interpret the result using distorted expectations from a Stackelberg multiplier game. +5. Illustrate frequency-domain and detection-error-probability aspects. +6. Evaluate the robustness of alternative decision rules. + +--- + +### The HST model + +```{index} single: Hansen Sargent Tallarini; model +``` + +HST's model features a planner with preferences over consumption streams $\{c_t\}$, mediated +through **service streams** $\{s_t\}$. Let $b$ be a preference shifter (utility bliss point). + +The **Bellman equation for the robust planner** is + +$$ +-x' P x - p = +\sup_c \inf_w \Bigl\{-(s-b)^2 + \beta\bigl(\theta w^{*\prime} w^* - \mathbb{E}\,x^{*\prime} P x^* - p\bigr)\Bigr\} +$$ (eq:income1) + +subject to the household technology, capital accumulation, endowment dynamics, and the state law: + +$$ +\begin{aligned} +s &= (1+\lambda)c - \lambda h \\ +h^* &= \delta_h h + (1-\delta_h) c \\ +k^* &= \delta_k k + i \\ +c + i &= \gamma k + d \\ +\begin{pmatrix}d\\b\end{pmatrix} &= U z \\ +z^* &= A_{22} z + C_2(\epsilon^* + w^*) +\end{aligned} +$$ (eq:income1a) + +Here $^*$ denotes the next-period value; $c$ is consumption; $s$ is the scalar service measure; +$h$ is a habit stock; $k$ is the capital stock; $i$ is investment; $d$ is an endowment/technology +shock; $b$ is a **preference shock** (bliss-point shifter, distinct from the bond/debt variable +$b_t$ used in Part A); $\epsilon^* \sim \mathcal{N}(0,I)$ is the baseline shock; and +$w^*$ is a **distortion** to the conditional mean of $\epsilon^*$ chosen by an evil agent. + +The penalty parameter $\theta > 0$ governs the consumer's concern about robustness. We use the +transformation + +$$ +\sigma = -\theta^{-1} \leq 0 +$$ + +so $\sigma = 0$ corresponds to no robustness concern and $\sigma < 0$ to an increasing concern. + +**Household technology.** When $\lambda > 0$ and $\delta_h \in (0,1)$, the technology +{eq}`eq:income1a` accommodates **habit persistence** (positive $\lambda$) or durability. The stock +$h_t$ is a geometric weighted average of current and past consumption. + +**Production technology.** Equation $c_t + k_t = Rk_{t-1} + d_t$ with +$R = \delta_k + \gamma$ combines capital accumulation with a linear production technology. $R$ is +the physical gross return on capital. + +**State vector.** Let $x_t' = [h_{t-1},\, k_{t-1},\, z_t']$. There is a set of state transition +equations: + +$$ +x_{t+1} = A\, x_t + B\, u_t + C(\epsilon_{t+1} + w_{t+1}) +$$ (eq:law0) + +where $u_t = c_t$ and $w_{t+1}$ is the distortion to the conditional mean of $\epsilon_{t+1}$. + +**HST's calibration.** HST estimated the model on U.S. quarterly data (1970Q1-1996Q3) using +nondurables plus services for consumption and durable consumption plus gross private investment for +investment. Key estimates are summarised in the following table (reported in Appendix A of HST): + +| Parameter | Habit | No Habit | +|-----------|-------|----------| +| Risk-free rate | 0.025 | 0.025 | +| $\beta$ | 0.997 | 0.997 | +| $\delta_h$ | 0.682 | — | +| $\lambda$ | 2.443 | 0 | +| $\alpha_1$ | 0.813 | 0.900 | +| $\alpha_2$ | 0.189 | 0.241 | +| $\phi_1$ | 0.998 | 0.995 | +| $\phi_2$ | 0.704 | 0.450 | +| $2 \times \log L$ | 779.05 | 762.55 | + +HST imposed $\beta R = 1$ and $\delta_k = 0.975$, so $\gamma$ is pinned down once $\beta$ is +estimated. An annual real interest rate of 2.5% corresponds to $\beta = 0.997$. + +--- + +### Solution when $\sigma = 0$ + +When $\sigma = 0$ the objective reduces to + +$$ +\mathbb{E}_0\sum_{t=0}^{\infty}\beta^t\bigl\{-(s_t - b_t)^2\bigr\} +$$ (eq:income5) + +Formulating a Lagrangian and deriving first-order conditions yields: + +$$ +\begin{aligned} +\mu_{st} &= b_t - s_t \\ +\mu_{ct} &= (1+\lambda)\mu_{st} + (1-\delta_h)\mu_{ht} \\ +\mu_{ht} &= \beta \mathbb{E}_t[\delta_h \mu_{h,t+1} - \lambda \mu_{s,t+1}] \\ +\mu_{ct} &= \beta R\, \mathbb{E}_t\mu_{c,t+1} +\end{aligned} +$$ (eq:foc) + +Here $\mu_{st}$ is the **marginal valuation of consumption services**, which summarises the +endogenous state variables $h_{t-1}$ and $k_{t-1}$. Equation {eq}`eq:foc` (last line) implies +$\mathbb{E}_t\mu_{c,t+1} = (\beta R)^{-1}\mu_{ct}$, so $\mu_{st}$ satisfies a **martingale representation** +when $\beta R = 1$: + +$$ +\mu_{st} = \mu_{s,t-1} + \nu'\epsilon_t +$$ (eq:martingale) + +for some vector $\nu$. Solving forward and substituting gives + +$$ +\mu_{st} = \Psi_1 k_{t-1} + \Psi_2 h_{t-1} + \Psi_3\sum_{j=0}^{\infty} R^{-j} \mathbb{E}_t b_{t+j} + + \Psi_4\sum_{j=0}^{\infty} R^{-j} \mathbb{E}_t d_{t+j} +$$ (eq:income10) + +where + +$$ +\Psi_1 = -(1+\lambda)R(1-R^{-2}\beta^{-1})\!\left[\frac{1-R^{-1}\tilde\delta_h}{1-R^{-1}\tilde\delta_h+\lambda(1-\tilde\delta_h)}\right], \quad +\Psi_4 = R^{-1}\Psi_1 +$$ (eq:income100a) + +and $\tilde\delta_h = (\delta_h + \lambda)/(1+\lambda)$. + +In the widely-studied special case $\lambda = \delta_h = 0$, so $s_t = c_t$ and +$\mu_{st} = b_t - c_t$, the marginal propensity to consume out of **non-human wealth** $Rk_{t-1}$ +equals that out of **human wealth** $\sum_{j=0}^{\infty}R^{-j}\mathbb{E}_t d_{t+j}$, a well-known feature of +the LQ model. + +**Representing $\nu$ in matrix form.** The formula for $\mu_{st}$ can be written as +$\mu_{st} = M_s x_t$ where $x_t$ follows {eq}`eq:law0`. It follows that + +$$ +\nu' = M_s C, \qquad \alpha = \sqrt{\nu'\nu} = \sqrt{M_s CC' M_s'} +$$ (eq:hsoffset2) + +The scalar $\alpha$ plays a central role in the observational equivalence result below. + +--- + +### Observational equivalence + +#### Theorem 1: HST observational equivalence + +```{index} single: Observational Equivalence; Theorem 1 +``` + +The key result of HST is: + +**Theorem 1 (Observational Equivalence, I).** *Fix all parameters except $(\sigma, \beta)$. +Suppose $\beta R = 1$ when $\sigma = 0$. There exists $\underline\sigma < 0$ such that for any +$\sigma \in (\underline\sigma, 0)$, the optimal consumption-investment plan for $(0,\beta)$ is also +chosen by a robust decision maker with parameters $(\sigma, \hat\beta(\sigma))$, where* + +$$ +\hat\beta(\sigma) = \frac{1}{R} + \frac{\sigma\alpha^2}{R-1} +$$ (eq:obseq) + +*and $\hat\beta(\sigma) < \beta$.* + +**Intuition.** Since $R > 1$ and $\alpha^2 > 0$, a more negative $\sigma$ (stronger robustness +concern) lowers $\hat\beta$. The observational equivalence proposition asserts that two offsetting +forces cancel: + +- A concern about robustness ($\sigma < 0$) makes the consumer save **more**, because he fears + that future income is lower than the approximating model predicts. +- Decreasing $\beta$ to $\hat\beta(\sigma)$ makes the consumer **less patient**, inducing less + saving. + +When these two forces are balanced according to {eq}`eq:obseq`, the consumption and investment +quantities are identical across $(\sigma, \hat\beta(\sigma))$ pairs. + +#### Proof sketch + +When $\beta R = 1$ and $\sigma = 0$, the marginal utility $\mu_{st}$ obeys the martingale + +$$ +\mu_{st} = \mu_{s,t-1} + \alpha\,\tilde\epsilon_t +$$ (eq:reversee1) + +where $\tilde\epsilon_t$ is scalar i.i.d.\ with mean zero and unit variance. Activating robustness +($\sigma < 0$) means the **evil agent** solves + +$$ +\tilde w_t = K(\sigma,\hat\beta)\,\mu_{s,t-1} +$$ + +making the worst-case model for $\mu_{st}$: + +$$ +\mu_{st} = (1 + \alpha\,K(\sigma,\hat\beta))\,\mu_{s,t-1} + \alpha\,\tilde\epsilon_t +$$ (eq:reversee3) + +For the allocation to remain the same, we require the robust Euler equation +$\hat\beta R\,\hat{\mathbb{E}}_t\mu_{s,t+1} = \mu_{st}$ to hold under the worst-case model, which gives + +$$ +(\hat\beta R)^{-1} = 1 + \alpha\, K(\sigma,\hat\beta) +$$ (eq:eulerdist) + +The evil agent's Bellman equation (a pure forecasting problem) yields + +$$ +\hat\zeta(\hat\beta) \equiv 1 + \alpha K(\sigma,\hat\beta) = \frac{1}{1 - \sigma\alpha^2 P(\hat\beta)} +$$ (eq:distort2) + +where $P(\hat\beta)$ solves the scalar Bellman equation: + +$$ +-P(\hat\beta) = \frac{\hat\beta - 1 + \sigma\alpha^2 + \sqrt{(\hat\beta-1+\sigma\alpha^2)^2 + 4\sigma\alpha^2}}{-2\sigma\alpha^2} +$$ (eq:distortcons) + +Solving {eq}`eq:eulerdist`-{eq}`eq:distortcons` for $\hat\beta$ gives exactly {eq}`eq:obseq`. +$\square$ + +--- + +### Observational equivalence: numerical illustration + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Observational equivalence locus + name: fig-lqcs-oe-locus +--- +# Observational equivalence locus: β_hat(σ) = 1/R + σα^2 / (R-1) +# HST benchmark values +β0 = 0.997 # benchmark discount factor +R0 = 1.0 / β0 # gross interest rate +α2_vals = [0.005, 0.015, 0.030] # three values of α^2 for illustration + +σ_grid = np.linspace(-0.002, 0.0, 400) + +fig, ax = plt.subplots() +for α2 in α2_vals: + β_hat = 1/R0 + σ_grid * α2 / (R0 - 1) + ax.plot(σ_grid, β_hat, label=rf'$\alpha^2 = {α2}$') + +ax.axhline(β0, linestyle=':', color='grey', linewidth=1, label=r'$\beta_0 = 0.997$') +ax.axvline(0, linestyle=':', color='grey', linewidth=1) +ax.set_xlabel(r'$\sigma = -\theta^{-1}$ (robustness parameter)') +ax.set_ylabel(r'$\hat\beta(\sigma)$') +ax.legend() +ax.grid(alpha=0.3) +plt.show() +``` + +```{note} +Each point on a locus is observationally equivalent (given only data on $(c_t, i_t)$) to the +benchmark $(\sigma=0, \beta_0)$. A larger $|\sigma|$ requires a smaller $\hat\beta$ to offset the +precautionary motive. A larger $\alpha^2$ (more volatile endowment process) amplifies the offset +required. +``` + +--- + +### Precautionary savings interpretation + +```{index} single: Precautionary Savings; robustness vs convex marginal utility +``` + +The consumer's concern about model misspecification activates a particular kind of precautionary +savings motive that underlies the observational equivalence proposition. + +**How it works.** A concern about robustness inspires the consumer to save **more** (precautionary +motive). Decreasing $\beta$ induces the consumer to save **less** (impatience). The +observational equivalence proposition asserts that these two forces can be made to offset each +other exactly. + +**The special case $\lambda = \delta_h = 0$.** Here $s_t = c_t$ and the consumption rule is + +$$ +c_t = (1 - R^{-2}\beta^{-1})\!\left[Rk_{t-1} + \mathbb{E}_t\sum_{j=0}^{\infty}R^{-j}d_{t+j}\right] + + \left(\frac{(R\beta)^{-1}-1}{R-1}\right)\!b +$$ (eq:consfunction) + +The **marginal propensity to consume** out of non-human wealth $Rk_{t-1}$ **equals** that out of +human wealth $\mathbb{E}_t\sum R^{-j}d_{t+j}$. This equal-propensity property is a hallmark of the LQ +model and **persists** when a concern for robustness is present (in contrast to the usual +precautionary savings models with convex marginal utility, where the two propensities diverge). + +**Theorem 1 offset.** Theorem 1 says that with $\sigma < 0$, the observationally equivalent +$\hat\beta$ satisfies $\hat\beta < \beta$. If the starting point has $\beta R = 1$, then +$\hat\beta R < 1$. For a non-robust consumer with discount factor $\hat\beta$ at the same +interest rate, the Euler equation implies $\mathbb{E}_t c_{t+1} < c_t$: expected consumption +declines over time. + +This downward drift is the impatience offset in Theorem 1. It cancels the robust consumer's +precautionary-savings motive, leaving the consumption and investment quantities unchanged. +The upward-drift comparison appears below in Theorem 2, which asks the reverse observational +equivalence question. + +**Comparison with classical precautionary savings.** The classical precautionary motive (see Leland 1968 and Miller 1974) arises because: + +$$ +u'''(c) > 0 \;\Rightarrow\; \mathbb{E}_t u'(c_{t+1}) > u'(\mathbb{E}_t c_{t+1}) \;\Rightarrow\; \mathbb{E}_t c_{t+1} > c_t +$$ + +This channel requires **convexity of marginal utility** and is absent with quadratic preferences. +In contrast, the robustness-based precautionary motive operates through distortions of **conditional +means** of shocks, it shifts the first moments of the shock distribution and is active even with +quadratic preferences. + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Expected consumption profile under Theorem 1 discount-factor offsets + name: fig-lqcs-drift +--- +# Illustrate the Theorem 1 offset: lower β_hat at fixed R +# In the special case λ = δ_h = 0: +# With β0 R = 1, Theorem 1 gives β_hat < β0 and hence β_hat R < 1 +# Euler equation: E_t c_{t+1} = (β_hat R) c_t -> downward drift + +β_vals = [0.990, 0.993, 0.995] # σ < 0 <-> β_hat < β0 in Theorem 1 +R_fixed = 1 / 0.997 +T_plot = 40 + +fig, ax = plt.subplots() +t_grid = np.arange(T_plot + 1) +c0 = 1.0 +for β_hat_i in β_vals: + drift = (β_hat_i * R_fixed) ** t_grid + label = rf'$\hat\beta = {β_hat_i}$, $\hat\beta R = {β_hat_i*R_fixed:.4f}$' + ax.plot(t_grid, c0 * drift, label=label) + +ax.axhline(c0, linestyle=':', color='grey', linewidth=1, + label=r'$\beta_0 R = 1$ (benchmark)') +ax.set_xlabel('period $t$') +ax.set_ylabel(r'$E_0 c_t / c_0$') +ax.legend(fontsize=9) +ax.grid(alpha=0.3) +plt.show() +``` + +--- + +### Observational equivalence and distorted expectations + +```{index} single: Distorted Expectations; Stackelberg multiplier game +``` + +The observational equivalence result can be interpreted using a **Stackelberg multiplier game**. +After the minimising evil agent has committed to a distortion process $\{w_{t+1}\}$, the maximising +consumer faces the following worst-case law of motion for the state $X_t$: + +$$ +\begin{aligned} +X_{t+1} &= \bigl(A - BF(\sigma,\hat\beta) + CK(\sigma,\hat\beta)\bigr) X_t + C\tilde\epsilon_{t+1} \\ +\begin{pmatrix}b_t\\d_t\end{pmatrix} &= S X_t +\end{aligned} +$$ (eq:sys2) + +The consumer forms expectations of future income using the **distorted transition matrix** +$A - BF + CK$ rather than the approximating transition matrix $A - BF$. + +The distorted expectations operator $\hat{\mathbb{E}}_t$ satisfies + +$$ +\hat{\mathbb{E}}_t X_{t+j} = (A - BF(\sigma,\hat\beta) + CK(\sigma,\hat\beta))^j X_t +$$ + +Observational equivalence requires that the modified human-wealth formula + +$$ +\hat\Psi_4 \sum_{j=0}^{\infty} R^{-j}\hat{\mathbb{E}}_t d_{t+j} +$$ + +equals its benchmark counterpart $\Psi_4 \sum_{j=0}^{\infty} R^{-j} \mathbb{E}_t d_{t+j}$. This is +achieved by a mutual adjustment of the coefficients $\hat\Psi_j$ (via $\hat\beta$) and the +distorted expectation operator $\hat{\mathbb{E}}_t$ (via $\sigma$). + +**Distorted endowment process.** The worst-case eigenvalue of $A - BF + CK$ exceeds that of +$A - BF$ in modulus: the worst-case distortions make the income process **more persistent** than +under the approximating model. This is the heart of the precautionary motive, the evil agent +makes future income look more risky by introducing low-frequency persistence. + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Worst-case model makes endowment more persistent + name: fig-lqcs-persistence +--- +# Illustrate how worst-case model makes endowment process more persistent +# We work with a simplified scalar AR(1) version for illustration: +# d_{t+1} = ρ d_t + σ_d ε_{t+1} (approximating model) +# The worst-case distortion increases the effective AR coefficient + +ρ_approx = 0.80 # AR coefficient under approximating model +σ_d = 0.10 # endowment shock std + +# Under worst-case model: effective AR coefficient = ρ + α*K(σ, β_hat) +# Here we show how the worst-case AR coefficient varies with |σ| +σ_rob_vals = [0.0, -0.0001, -0.0003, -0.0005] +T_irf = 30 + +fig, ax = plt.subplots() +for σ_rob in σ_rob_vals: + # α K ~= |σ| * P * α^2 / (1 - |σ| * α^2 * P) - simplified scalar version + # For illustration, use a linear approximation: Δρ ~= -σ * α^2 * const + α2_scal = 0.01 + Δρ = -σ_rob * α2_scal * 2.0 # heuristic scaling for illustration + ρ_wc = ρ_approx + Δρ + irf = ρ_wc ** np.arange(T_irf) + if σ_rob == 0.0: + ax.plot(irf, linewidth=2.5, linestyle='-', + label=rf'approx. model ($\sigma = 0$, $\rho = {ρ_approx}$)') + else: + ax.plot(irf, linestyle='--', + label=rf'worst-case ($\sigma = {σ_rob}$, $\rho_{{wc}} \approx {ρ_wc:.4f}$)') + +ax.set_xlabel('periods after shock') +ax.set_ylabel('impulse response of $d_t$') +ax.legend(fontsize=9) +ax.grid(alpha=0.3) +plt.show() +``` + +--- + +### Frequency domain interpretation + +```{index} single: Frequency Domain; permanent income model +``` + +The LQ permanent income framework has a natural frequency-domain interpretation: + +- The consumer's concave utility makes him dislike **high-frequency** fluctuations in consumption. + He smooths out high-frequency income fluctuations by adjusting savings. +- High-frequency fluctuations are easier to smooth, the consumer is automatically robust to + misspecification of high-frequency features of the income process. +- **Low-frequency** (very persistent) fluctuations are harder to smooth and cause the consumer the + most trouble. + +In the frequency-domain notation of HST, the transfer function from shocks $\epsilon_t$ to the +target $s_t - b_t$ is $G(\zeta)$, and the frequency decomposition of the $H_2$ criterion is + +$$ +H_2 = -\frac{1}{2\pi}\int_{-\pi}^{\pi} \operatorname{trace}\!\bigl[G(\sqrt\beta\, e^{i\omega})'\,G(\sqrt\beta\, e^{i\omega})\bigr]\, d\omega +$$ + +The integrand $G'G$ is **largest at low frequencies** $\omega \approx 0$, the consumer's welfare is +most sensitive to low-frequency income variability. + +Recognising this, the evil agent concentrates the worst-case distortions at low frequencies. The +distortion process has spectral density $W(\zeta)' W(\zeta)$ that is concentrated near $\omega = 0$. +The variance of the worst-case shocks grows as $|\sigma|$ increases. + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Worst-case distortions concentrate at low frequencies + name: fig-lqcs-frequency +--- +# Illustrate frequency concentration of worst-case shocks +# We plot a stylised version of trace[W(ζ)'W(ζ)] vs frequency + +ω_grid = np.linspace(-np.pi, np.pi, 500) + +# Approximating model: flat spectrum (i.i.d. shocks) +spectrum_approx = np.ones_like(ω_grid) + +# Worst-case shocks: power concentrated at low frequencies +# Model as a low-pass spectrum W(ω) ~ 1/(ω^2 + κ^2) +def low_pass(ω, κ, scale): + return scale / (ω**2 + κ**2) + +fig, axes = plt.subplots(1, 2, figsize=(12, 4)) + +# Left panel: criterion G'G (low-frequency dominance) +G_sq = 1.0 / (np.abs(np.exp(1j * ω_grid) - 0.95)**2 + 0.01) +G_sq = G_sq / G_sq.max() +axes[0].plot(ω_grid, G_sq, color='steelblue', linewidth=2) +axes[0].set_title(r"Stylised $G(\zeta)'G(\zeta)$ vs frequency $\omega$") +axes[0].set_xlabel(r'frequency $\omega$') +axes[0].set_ylabel(r'$G(\zeta)^\prime G(\zeta)$') +axes[0].set_xlim(-np.pi, np.pi) +axes[0].grid(alpha=0.3) +axes[0].text(0, 0.5, 'Low-frequency\ndominance', ha='center', fontsize=10, color='steelblue') + +# Right panel: worst-case shock spectra for two values of |σ| +for σ_abs, kap, scale in [(0.0001, 0.02, 0.8), (0.00005, 0.02, 0.4)]: + W_sq = low_pass(ω_grid, kap, scale) + 0.01 + W_sq = W_sq / W_sq.max() * σ_abs / 0.0001 + axes[1].plot(ω_grid, W_sq, + label=rf'$|\sigma| = {σ_abs}$') +axes[1].set_title(r"Stylised $\operatorname{tr}[W(\zeta)'W(\zeta)]$ vs frequency") +axes[1].set_xlabel(r'frequency $\omega$') +axes[1].set_ylabel(r'$\operatorname{tr}[W^\prime W]$') +axes[1].set_xlim(-np.pi, np.pi) +axes[1].legend() +axes[1].grid(alpha=0.3) + +plt.tight_layout() +plt.show() +``` + +--- + +### Detection error probabilities + +```{index} single: Detection Error Probabilities +``` + +A natural way to discipline the choice of $\sigma$ (or $\theta$) is to ask: **how difficult would +it be to statistically distinguish the approximating model from the worst-case model?** + +For a sample of length $T$, one can use a **log-likelihood ratio test** to compare the two +hypotheses. The **detection error probability** (DEP) is the probability of making the wrong +decision using the log-likelihood ratio statistic when one does not know which model generated the +data. Specifically: + +$$ +\text{DEP}(\sigma) = \frac{1}{2}\bigl[\mathbb{P}(\text{prefer approx.} \mid \text{worst-case is true}) + + \mathbb{P}(\text{prefer worst-case} \mid \text{approx. is true})\bigr] +$$ + +When $\sigma = 0$ the two models are identical and DEP $= 0.5$. As $|\sigma|$ increases the +models diverge and the DEP falls toward zero. + +The **relative entropy** between the worst-case and approximating models provides an analytical +approximation. For the scalar version of the problem, the Kullback-Leibler divergence between the +two Gaussian models over $T$ periods is approximately: + +$$ +\Delta(\sigma) \approx \frac{T}{2}\,\alpha^2\, K(\sigma,\hat\beta)^2 \cdot \text{tr}[\text{state covariance}] +$$ + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Detection error probabilities + name: fig-lqcs-dep +--- +# Illustrate detection error probabilities (scalar stylised version) +# We compute relative entropy approximation and convert to DEP + +T_sample = 107 # HST sample length (1970Q1 - 1996Q3) +α2_val = 0.01 # representative α^2 value +R_dep = 1.0 / 0.997 + +# For each σ compute: K(σ,β_hat) from worst-case multiplier, then Δ, then DEP +def compute_dep(σ_val, α2, R, T): + """ + Compute detection error probability for a given σ. + Uses a simplified scalar approximation. + """ + if σ_val == 0.0: + return 0.5 + β_hat = 1.0 / R + σ_val * α2 / (R - 1) + if β_hat <= 0: + return 0.0 + + # Scalar Bellman equation for P(β_hat) + disc = (β_hat - 1 + σ_val * α2)**2 + 4 * σ_val * α2 + if disc < 0: + return np.nan + P_val = (β_hat - 1 + σ_val * α2 + np.sqrt(disc)) / (-2 * σ_val * α2) + + # Worst-case gain K + K_val = σ_val * α2 * P_val / (1 - σ_val * α2 * P_val) + + # KL divergence (one-sided) approximation per period: 0.5 * K^2 * α^2 + kl_per = 0.5 * K_val**2 * α2 + kl_T = kl_per * T + + # DEP ~= Φ(-sqrt(KL_T / 2)) where Φ is standard normal CDF + dep = norm.cdf(-np.sqrt(kl_T / 2)) + return dep + +σ_dep_grid = np.linspace(-0.002, 0, 200) +dep_vals = np.array([compute_dep(s, α2_val, R_dep, T_sample) + for s in σ_dep_grid]) + +fig, ax = plt.subplots() +ax.plot(σ_dep_grid, dep_vals, linewidth=2, color='steelblue') +ax.axhline(0.5, linestyle=':', color='grey', linewidth=1, label='DEP = 0.5 at σ = 0') +ax.axhline(0.3, linestyle='--', color='tomato', linewidth=1, label='DEP = 0.3 (plausible threshold)') +ax.axhline(0.2, linestyle='--', color='orange', linewidth=1, label='DEP = 0.2') +ax.set_xlabel(r'$\sigma$ (robustness parameter)') +ax.set_ylabel('detection error probability') +ax.legend() +ax.grid(alpha=0.3) +plt.show() +``` + +```{note} +HST suggested that a DEP above 0.2 is "plausible", meaning the models are still hard enough to +distinguish statistically that a concern for robustness is warranted. Values of $\sigma$ +corresponding to DEP $\geq 0.2$ define a set of plausible worst-case models. +``` + +--- + +### Robustness of decision rules + +```{index} single: Robustness; payoff evaluation +``` + +To evaluate whether robust decision rules perform better than the non-robust rule when the data are +generated by a distorted model, define the **payoff** when the decision rule is designed for +robustness parameter $\sigma_2$ and the data are generated by the distorted model associated with +$\sigma_1$: + +$$ +\pi(\sigma_1;\sigma_2) = -\mathbb{E}_{0,\sigma_1}\sum_{t=0}^{\infty}\beta^t\, x_t' H(\sigma_2)' H(\sigma_2)\, x_t +$$ (eq:soln3) + +where the state evolves under decision rule $F(\sigma_2)$ and worst-case shocks $K(\sigma_1)$: + +$$ +x_{t+1} = \bigl(A - BF(\sigma_2) + CK(\sigma_1)\bigr)x_t + C\epsilon_{t+1} +$$ (eq:soln2) + +For $\sigma_1 = 0$ (approximating model generates data), the non-robust rule ($\sigma_2 = 0$) is +optimal by construction. As $\sigma_1$ decreases (the data are generated by increasingly +distorted models), the payoff of the $\sigma_2 = 0$ rule deteriorates faster than that of robust +rules. + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Payoff of robust vs non-robust rules + name: fig-lqcs-payoff +--- +# Illustrate robustness payoff comparison (scalar stylised version) +# π(σ1; σ2) measures performance of rule σ2 when model σ1 generates data + +α2_rob = 0.01 +R_rob = 1.0 / 0.997 +β_rob = 0.997 +σ2_vals = [0.0, -0.0004, -0.0008] # three decision-rule robustness levels + +σ1_grid = np.linspace(-0.0010, 0.0, 300) + +def payoff_approx(σ1, σ2, α2, β, R): + """ + Approximate payoff π(σ1; σ2) in a stylised scalar version. + Captures the key qualitative pattern from HST. + """ + # Effective drift under (F(σ2), K(σ1)): + β_hat2 = 1.0/R + σ2 * α2 / (R - 1) if σ2 != 0 else β + β_hat1 = 1.0/R + σ1 * α2 / (R - 1) if σ1 != 0 else β + + # K for σ1 + if σ1 == 0.0: + K1 = 0.0 + else: + disc = (β_hat1 - 1 + σ1*α2)**2 + 4*σ1*α2 + if disc < 0: return np.nan + P1 = (β_hat1 - 1 + σ1*α2 + np.sqrt(disc)) / (-2*σ1*α2) + K1 = σ1 * α2 * P1 / (1 - σ1*α2*P1) + + # Effective persistence of state under (F(σ2), K(σ1)) + ρ_eff = 0.85 + K1 * np.sqrt(α2) # simplified + if abs(ρ_eff) >= 1.0: + return np.nan + + # Lyapunov variance of state + var_x = α2 / (1 - ρ_eff**2) + + # Per-period loss scales with variance of state + loss = -var_x * (1.0 + 0.5 * abs(σ2) / 0.001) + return loss + +fig, ax = plt.subplots() +for σ2 in σ2_vals: + π_vals = [payoff_approx(s1, σ2, α2_rob, β_rob, R_rob) for s1 in σ1_grid] + ax.plot(σ1_grid, π_vals, label=rf'$\sigma_2 = {σ2}$ (rule)') + +ax.set_xlabel(r'$\sigma_1$ (data-generating distortion)') +ax.set_ylabel(r'$\pi(\sigma_1;\,\sigma_2)$') +ax.legend(fontsize=9) +ax.grid(alpha=0.3) +plt.show() +``` + +```{note} +The function `payoff_approx` is a reduced-form scalar analogue of the full HST matrix +calculation. It keeps the same comparison -- fix a data-generating distortion $\sigma_1$ +and compare rules indexed by $\sigma_2$ -- but it is not calibrated to reproduce HST's +numerical payoffs. The figure should be read as a qualitative payoff comparison. +``` + +```{note} +By construction, the $\sigma_2 = 0$ (non-robust) rule achieves the highest payoff when +$\sigma_1 = 0$. But as $\sigma_1$ decreases (the data are generated by increasingly distorted +models), its payoff deteriorates faster than that of the robust rules. A robust decision rule +sacrifices a small amount of performance under the approximating model in exchange for better +performance when the approximating model is wrong. +``` + +--- + +### Another observational equivalence result + +```{index} single: Observational Equivalence; Theorem 2 +``` + +**Theorem 2 (Observational Equivalence, II).** *Fix all parameters except $(\sigma,\beta)$. +Consider a consumption-investment allocation for $(\hat\sigma, \hat\beta)$ where +$\hat\beta R = 1$ and $\hat\sigma < 0$. Then there exists $\tilde\beta > \hat\beta$ such that the +$(\hat\sigma, \hat\beta)$ allocation also solves the $(0, \tilde\beta)$ problem.* + +**Interpretation.** Theorem 1 showed that starting from a benchmark with $\beta R = 1$, activating +robustness ($\sigma < 0$) is equivalent to *reducing* $\beta$. Theorem 2 goes in the opposite +direction: it shows that the effects of activating a concern for robustness from a starting point +with $\beta R = 1$ are replicated by *increasing* $\beta$ (while setting $\sigma = 0$). + +In other words, when $\beta R = 1$, a concern for robustness operates like an **increase** in the +discount factor, pushing $\beta R > 1$ and imparting an **upward drift** to the expected +consumption profile. + +**Proof.** With $\hat\beta R = 1$ and $\hat\sigma < 0$, the robust Euler equation implies + +$$ +\hat{\mathbb{E}}_t \mu_{c,t+1} = \mu_{ct} +$$ + +One seeks $\tilde\beta > \hat\beta$ and $\sigma = 0$ such that the same allocation solves the +non-robust problem with discount factor $\tilde\beta$. The key step is to observe that the +worst-case distortion $K(\hat\sigma, \hat\beta)$ introduces a drift in the marginal utility +process that is equivalent to the drift produced by raising the discount factor above $\hat\beta$. +Equating the two drifts and solving the scalar Bellman equation for $K$ yields + +$$ +\tilde\beta(\hat\sigma) = \frac{\hat\beta(1+\hat\beta)}{2(1+\hat\sigma\alpha^2)} +\left[1 + \sqrt{1 - 4\hat\beta\,\frac{1+\hat\sigma\alpha^2}{(1+\hat\beta)^2}}\right] +$$ (eq:obsequivn2) + +The solution satisfies $\tilde\beta > \hat\beta$ when $\hat\sigma < 0$. $\square$ + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Theorem 2 observational equivalence locus + name: fig-lqcs-theorem2 +--- +# Compute the Theorem 2 equivalence: β_tilde(σ_hat) vs σ_hat +# Starting from β_hat R = 1 and σ_hat < 0 + +β_hat_val = 0.997 # β_hat satisfying β_hatR = 1 +α2_th2 = 0.01 + +σ_hat_grid = np.linspace(-0.001, 0.0, 300) + +def β_tilde(σ_hat, β_hat, α2): + """Theorem 2 formula for β_tilde.""" + disc = 1 - 4 * β_hat * (1 + σ_hat * α2) / (1 + β_hat)**2 + if np.any(disc < 0): + disc = np.maximum(disc, 0) + return β_hat * (1 + β_hat) / (2 * (1 + σ_hat * α2)) * (1 + np.sqrt(disc)) + +β_tilde_vals = β_tilde(σ_hat_grid, β_hat_val, α2_th2) + +fig, ax = plt.subplots() +ax.plot(σ_hat_grid, β_tilde_vals, linewidth=2, color='steelblue', + label=r'$\tilde\beta(\hat\sigma)$') +ax.axhline(β_hat_val, linestyle='--', color='tomato', + label=rf'$\hat\beta = {β_hat_val}$ (baseline, $\hat\beta R = 1$)') +ax.set_xlabel(r'$\hat\sigma$ (robustness parameter)') +ax.set_ylabel(r'$\tilde\beta$') +ax.legend() +ax.grid(alpha=0.3) +plt.show() +``` + +--- + +### A robust LQ Bewley model + +```{index} single: Robust Bewley Model +``` + +We now synthesise Parts A and B by embedding the Bewley economy of Part A into the HST +framework and applying the observational equivalence theorem. This constructs a family of +**robust Bewley economies** (parameterised by a robustness level $\sigma \leq 0$) whose +equilibrium quantities are identical to those of the plain vanilla Part A model. + +#### Mapping the Bewley economy into Part B notation + +We specialise the HST model to $\lambda = \delta_h = 0$ (no habits, no durable goods) and to a +pure endowment economy (no physical capital, $k_t = 0$). In this case: + +- Services equal consumption: $s_t = c_t$. +- The only asset is the one-period risk-free bond $b_t$. +- The endowment process follows the state-space representation {eq}`eq:sprob15`. + +The household's augmented state vector is $x_t = [b_t,\; z_t']'$, and the law of motion +{eq}`eq:law0` specialises to + +$$ +\begin{pmatrix} b_{t+1} \\ z_{t+1} \end{pmatrix} += +\underbrace{\begin{pmatrix} R & R\check{G} \\ 0 & \check{A} \end{pmatrix}}_{A} +\begin{pmatrix} b_t \\ z_t \end{pmatrix} ++ +\underbrace{\begin{pmatrix} -R \\ 0 \end{pmatrix}}_{B} +c_t ++ +\underbrace{\begin{pmatrix} 0 \\ \check{C} \end{pmatrix}}_{C} +\epsilon_{t+1} +$$ (eq:bew_law) + +The objective is $\mathbb{E}_0 \sum_{t=0}^\infty \beta^t [-(c_t - \gamma)^2/2]$, which is the HST +criterion {eq}`eq:income5` with $\sigma = 0$ and $b_t \equiv \gamma$ (a fixed bliss level). +The robust Bellman equation {eq}`eq:income1` with $\sigma = 0$ therefore reduces exactly to +the Part A LQ problem, confirming that the HST framework nests the Bewley model. + +#### The $\alpha^2$ parameter + +From Representation 2 of Part A {eq}`eq:sprob16`, the consumption innovation is + +$$ +c_{t+1} - c_t = h\, w_{t+1}, \qquad +h = (1-\beta)\,\check{G}(I-\beta\check{A})^{-1}\check{C} +$$ (eq:bew_cinno) + +The vector $h$ plays the role of $\nu' = M_s C$ in the HST scalar $\alpha$ formula +{eq}`eq:hsoffset2`. Consequently, + +$$ +\alpha^2 = h h' = (1-\beta)^2\, +\check{G}(I-\beta\check{A})^{-1}\check{C}\check{C}'(I-\beta\check{A}')^{-1}\check{G}' +$$ (eq:bew_alpha) + +For the two-factor model {eq}`eq:twofactor` with $\check{A} = \mathrm{diag}(1,0)$ and +$\check{C} = \mathrm{diag}(\sigma_1,\sigma_2)$ this simplifies to + +$$ +\alpha^2 = \sigma_1^2 + (1-\beta)^2\,\sigma_2^2 +$$ (eq:bew_alpha2) + +The permanent shock variance $\sigma_1^2$ enters with coefficient 1 because a unit permanent +shock is **fully** capitalised into consumption. The transitory shock variance $\sigma_2^2$ +enters with the small coefficient $(1-\beta)^2$ because only its annuity value is consumed. + +#### The observational equivalence locus + +Applying Theorem 1 {eq}`eq:obseq` with equilibrium interest rate $R = \beta_0^{-1}$ and +$\alpha^2$ from {eq}`eq:bew_alpha2` gives the **Bewley observational equivalence locus**: + +$$ +\hat\beta(\sigma) = \beta_0 + \frac{\sigma\,\alpha^2\,\beta_0}{1-\beta_0} +$$ (eq:bew_locus) + +For $\sigma < 0$, we have $\hat\beta(\sigma) < \beta_0$. An agent with the pair +$(\sigma, \hat\beta(\sigma))$ on this locus is more concerned about model misspecification +(lower $\sigma$) but also more impatient (lower $\hat\beta$); the two forces cancel exactly, +leaving the consumption decision rule unchanged. + +#### A robust Bewley equilibrium + +**Proposition.** *Suppose all agents in the Bewley economy share a common pair +$(\sigma, \hat\beta(\sigma))$ lying on the locus {eq}`eq:bew_locus`, with $R = \beta_0^{-1}$. +Then every agent's optimal consumption plan is identical to that of the plain vanilla +$(\sigma = 0,\, \beta_0)$ economy, and the equilibrium interest rate remains $R = \beta_0^{-1}$.* + +*Proof.* By Theorem 1, each agent's consumption-saving rule is identical to the benchmark. +The goods-market clearing condition $\int c_t^i\, di = Y$ is therefore satisfied at +$R = \beta_0^{-1}$ for the same reason as in Part A. $\square$ + +#### Heterogeneous $(\beta^i, \sigma^i)$ preferences + +A richer extension populates the economy with a **continuum of types**, each indexed by a +robustness parameter $\sigma^i \in [\underline\sigma, 0]$, with discount factor + +$$ +\beta^i = \hat\beta(\sigma^i) = \beta_0 + \frac{\sigma^i\,\alpha^2\,\beta_0}{1-\beta_0} +$$ (eq:bew_heterog) + +Since all pairs $(\sigma^i, \beta^i)$ lie on {eq}`eq:bew_locus`, every agent adopts the +**same consumption rule** as the benchmark. Therefore: + +1. **Aggregate dynamics are unchanged**: the cross-section mean of consumption equals $Y$, and + the cross-section variance grows at rate $\alpha^2$ per period. +2. **Equilibrium interest rate is unchanged**: $R = \beta_0^{-1}$. +3. **Agents are observationally indistinguishable** to an outside econometrician: data on + $(c_t^i, b_t^i)$ cannot reveal whether agent $i$ has $\sigma^i = 0$ or $\sigma^i < 0$. +4. **Agents differ in their internal model**: an agent with $\sigma^i < 0$ applies a worst-case + distortion $w_{t+1}^i = K(\sigma^i, \beta^i)\,\mu_{s,t}^i$ to her conditional expectations, + while an agent with $\sigma^i = 0$ takes the approximating model at face value. + +This sets the stage for a Bewley model with **heterogeneous ambiguity aversion**: although +every agent acts identically in terms of observable choices, they hold different subjective +models of the income process and have different attitudes toward model uncertainty. + +#### Numerical illustration + +```{code-cell} ipython3 +# -- Part B Bewley model: compute α^2 and the observational equivalence locus -- +# Reuse Part A parameters (β = 0.95, σ1 = 0.15, σ2 = 0.30 from the top of the lecture). +# Note: Part B's HST calibration used β0 = 0.997; here we deliberately re-use the +# Part A value so that the numerical Bewley illustration is internally consistent. +β0_bew = β # 0.95 +σ1_bew = σ1 # 0.15 +σ2_bew = σ2 # 0.30 +R_bew = 1.0 / β0_bew + +# α^2 for the two-factor Bewley model (eq:bew_alpha2) +α2_bew = σ1_bew**2 + (1 - β0_bew)**2 * σ2_bew**2 + +print(f"α^2 (Bewley, two-factor) = {α2_bew:.6f}") +print(f" permanent component σ1^2 = {σ1_bew**2:.6f} " + f"({100*σ1_bew**2/α2_bew:.1f} % of α^2)") +print(f" transitory component (1-β)^2σ2^2= {(1-β0_bew)**2*σ2_bew**2:.6f} " + f"({100*(1-β0_bew)**2*σ2_bew**2/α2_bew:.1f} % of α^2)") +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Bewley locus and $\alpha^2$ decomposition + name: fig-lqcs-bewley-locus +--- +# -- Observational equivalence locus β_hat(σ) = β0 + σ α^2 β0 / (1-β0) ---------- +# σ range: go down until β_hat = 0.80 +σ_min_bew = (0.80 - β0_bew) * (1 - β0_bew) / (α2_bew * β0_bew) +σ_bew_grid = np.linspace(σ_min_bew, 0, 400) +β_hat_bew = β0_bew + σ_bew_grid * α2_bew * β0_bew / (1 - β0_bew) + +# Five representative agent types evenly spaced on the locus +n_types = 5 +σ_types = np.linspace(σ_min_bew * 0.8, 0.0, n_types) +β_types = β0_bew + σ_types * α2_bew * β0_bew / (1 - β0_bew) + +fig, axes = plt.subplots(1, 2, figsize=(12, 4)) + +# -- Left: the locus with agent types ----------------------------------------- +axes[0].plot(σ_bew_grid, β_hat_bew, lw=2, color='steelblue', + label=r'$\hat\beta(\sigma)$ locus') +axes[0].axhline(β0_bew, ls=':', color='grey', lw=1, + label=rf'$\beta_0 = {β0_bew}$ (benchmark, $\sigma=0$)') +colors_types = plt.cm.plasma(np.linspace(0.15, 0.85, n_types)) +for i, (s, b) in enumerate(zip(σ_types, β_types)): + axes[0].scatter([s], [b], s=90, color=colors_types[i], zorder=5, + label=rf'Type {i+1}: $\sigma={s:.4f},\;\beta={b:.4f}$') +axes[0].set_xlabel(r'$\sigma$ (robustness parameter)') +axes[0].set_ylabel(r'$\hat\beta(\sigma)$') +axes[0].set_title('Bewley observational equivalence locus\n' + r'$\hat\beta(\sigma) = \beta_0 + \sigma\alpha^2\beta_0/(1{-}\beta_0)$') +axes[0].legend(fontsize=7.5) +axes[0].grid(alpha=0.3) + +# -- Right: α^2 decomposition as σ1 and σ2 vary -------------------------------- +σ1_range = np.linspace(0.01, 0.50, 120) +σ2_range = np.linspace(0.01, 0.70, 120) +axes[1].plot(σ1_range, σ1_range**2, + lw=2, color='steelblue', + label=r'permanent: $\sigma_1^2$') +axes[1].plot(σ2_range, (1 - β0_bew)**2 * σ2_range**2, + lw=2, color='tomato', ls='--', + label=r'transitory: $(1{-}\beta)^2\sigma_2^2$') +axes[1].axvline(σ1_bew, ls=':', color='steelblue', alpha=0.7, + label=rf'calibrated $\sigma_1={σ1_bew}$') +axes[1].axvline(σ2_bew, ls=':', color='tomato', alpha=0.7, + label=rf'calibrated $\sigma_2={σ2_bew}$') +axes[1].set_xlabel(r'$\sigma_j$') +axes[1].set_ylabel(r'contribution to $\alpha^2$') +axes[1].set_title(r'Decomposition of $\alpha^2$: permanent vs transitory shocks') +axes[1].legend(fontsize=9) +axes[1].grid(alpha=0.3) + +plt.tight_layout() +plt.show() +``` + +```{code-cell} ipython3 +--- +mystnb: + figure: + caption: Identical consumption paths across robust types + name: fig-lqcs-types +--- +# -- Observational equivalence: all agent types share the same consumption path - +# Consumption innovation h is IDENTICAL across all types on the locus +# h = [σ1, (1-β0)σ2] (from eq:bew_cinno for the two-factor model) +h_bew = np.array([σ1_bew, (1 - β0_bew) * σ2_bew]) +print(f"Consumption innovation vector h = [{h_bew[0]:.4f}, {h_bew[1]:.4f}]") +print(f"Var(Δc) = h*h' = α^2 = {h_bew @ h_bew:.6f} (equals α^2_bew: {α2_bew:.6f})") + +rng = np.random.default_rng(17) +T_het = 80 +eps = rng.standard_normal((T_het, 2)) # one common shock sequence + +c_types = np.zeros((n_types, T_het + 1)) +for i in range(n_types): + # h is the same for all types - consumption paths are identical + for t in range(T_het): + c_types[i, t + 1] = c_types[i, t] + eps[t] @ h_bew + +fig, axes = plt.subplots(1, 2, figsize=(12, 4)) + +# -- Left: consumption paths (should all coincide) ----------------------------- +for i in range(n_types): + axes[0].plot(c_types[i], color=colors_types[i], lw=1.5, + label=rf'Type {i+1} ($\sigma^i={σ_types[i]:.4f}$)', + alpha=0.8) +axes[0].set_xlabel('period $t$') +axes[0].set_ylabel(r'$c_t - c_0$') +axes[0].set_title('Consumption paths are identical across all types\n' + '(observational equivalence)') +axes[0].legend(fontsize=8) +axes[0].grid(alpha=0.3) + +# -- Right: worst-case income-process persistence differs across types ---------- +# Under type i's worst-case model, effective AR(1) for z_{1t} becomes ρ_wc^i > 1 +# in the limit; for z_{2t} it stays near 0. +# Simple scalar illustration: ρ_wc ~= 1 + sqrt α^2 * K(σ^i, β^i), K from Bellman eqn +ρ0 = 1.0 # permanent component AR root under approximating model +T_irf_wc = 30 +horizons = np.arange(T_irf_wc) + +for i, (s, b) in enumerate(zip(σ_types, β_types)): + if s == 0.0: + ρ_wc = ρ0 + lbl = rf'Type {i+1} $(\sigma=0)$: approx. model' + else: + # K from scalar Bellman (eq:distort2 / eq:distortcons) + R_i = 1.0 / b + disc = (b - 1 + s * α2_bew)**2 + 4 * s * α2_bew + disc = max(disc, 0) + P_i = (b - 1 + s * α2_bew + np.sqrt(disc)) / (-2 * s * α2_bew) + K_i = s * α2_bew * P_i / (1 - s * α2_bew * P_i) + # effective AR root of z_{1t} under worst-case: 1 + sqrt α^2 * K + ρ_wc = min(ρ0 + np.sqrt(α2_bew) * abs(K_i) * 0.6, 1.08) # cap for display + lbl = (rf'Type {i+1} $(\sigma={s:.4f})$: ' + rf'$\rho_{{wc}}\approx{ρ_wc:.4f}$') + irf_wc = ρ_wc ** horizons + axes[1].plot(horizons, irf_wc, color=colors_types[i], lw=1.8, + linestyle='-' if s == 0 else '--', label=lbl) + +axes[1].set_xlabel('horizon $h$') +axes[1].set_ylabel('Impulse response of permanent income component') +axes[1].set_title("Worst-case model: more persistent income\nfor agents with $\\sigma^i < 0$") +axes[1].legend(fontsize=7.5) +axes[1].grid(alpha=0.3) + +plt.tight_layout() +plt.show() +``` + +```{note} +The left panel confirms the **observational equivalence**: despite agents having different +$(\sigma^i, \beta^i)$ pairs, their consumption paths are identical because all pairs lie on +the locus {eq}`eq:bew_locus`. The right panel shows what *does* differ across types: the +worst-case model. Agents with lower $\sigma^i$ (stronger robustness concerns) entertain a +worst-case income process with **greater persistence** (the very low-frequency distortion +identified in the frequency-domain section) even though their observed consumption behaviour +is indistinguishable from a fully trusting agent. +``` + +--- + +### Concluding remarks + +We close with a summary of the key messages from both parts of this lecture. + +**Part A** demonstrated that the LQ permanent income model (a rational-expectations version of +Friedman's permanent income hypothesis) has two complementary state-space representations: + +1. **$(b_t, z_t)$ representation**: emphasises that the consumer's optimal borrowing is history + dependent and cointegrated with consumption. + +2. **$(c_t, z_t)$ representation**: emphasises that consumption is a martingale (random walk) + and that assets $b_t$ are encoded in consumption. The impulse response function of + consumption is "box-shaped": a permanent shift in the level. + +We embedded this single-agent model in a Bewley equilibrium with a continuum of ex-post +heterogeneous consumers. The equilibrium gross interest rate $R = \beta^{-1}$ is supported by +constant average consumption, though the cross-section variance of consumption grows linearly with +age. A complete-markets version of the same model achieves full risk sharing and a time-invariant +consumption distribution at the cost of more complex financial arrangements (Arrow securities). + +**Part B** showed how a concern for model misspecification (parameterised by $\sigma = -\theta^{-1} +\leq 0$) alters the permanent income model. The main results are: + +- A concern for robustness generates a precautionary savings motive even under quadratic + preferences, by distorting the conditional means of income shocks. + +- The distorted worst-case model makes the income process **more persistent** (shifts power toward + low frequencies), which is precisely where the permanent income consumer is most vulnerable. + +- The observational equivalence theorem (Theorem 1) shows that for quantities $(c_t, i_t)$ alone, + a concern for robustness is indistinguishable from a reduction in $\beta$. + +- Theorem 2 goes the other direction: starting from $\beta R = 1$, robustness is observationally + equivalent to an *increase* in $\beta$, which imparts an upward drift to expected consumption. + +- Detection error probabilities provide a principled way to calibrate $\sigma$: choose $|\sigma|$ + small enough that the approximating and worst-case models remain difficult to distinguish + statistically. + +- The observationally equivalent $(\sigma, \hat\beta)$ pairs **do** have different implications for + asset prices, a point explored further by HST in the asset-pricing context. + +- Section 79.3.12 (A Robust LQ Bewley Model) shows how the Part A Bewley economy nests in the + Part B robust-control notation at $\sigma = 0$, and then constructs a robust extension in which + agents choose $(\sigma, \beta)$ pairs on an observational-equivalence locus. Along this locus, + agents have the same consumption decision rule and support the same equilibrium interest rate + $R = \beta_0^{-1}$ as the plain-vanilla Bewley model, while differing in their worst-case + subjective income dynamics. + +--- + +## Integrated exercises + +```{exercise-start} +:label: lqcs_ex1 +``` + +**From Part A to Part B notation.** + +Specialise the Part B robust-control setup to the no-habit, no-capital LQ Bewley environment +($\lambda = \delta_h = 0$, $k_t = 0$), and let the endowment process be the two-factor model in +{eq}`eq:twofactor`. + +1. Write the household state as $x_t = [b_t, z_t']'$ and derive matrices $(A, B, C)$ for the law + of motion {eq}`eq:law0`. +2. Show that when $\sigma = 0$, the Bellman problem coincides with the Part A LQ permanent-income + problem. +3. Derive $\alpha^2$ as in Section 79.3.12 and verify + +$$ +\alpha^2 = \sigma_1^2 + (1-\beta)^2\sigma_2^2. +$$ + +Interpret economically why the permanent and transitory components enter with different weights. + +```{exercise-end} +``` + +```{solution-start} lqcs_ex1 +:class: dropdown +``` + +1. With $x_t = [b_t, z_t']'$ and budget law $b_{t+1} = R(b_t + c_t - y_t)$, + $y_t = \check G z_t$, $z_{t+1} = \check A z_t + \check C \epsilon_{t+1}$, the stacked law is + +$$ +\begin{pmatrix} b_{t+1} \\ z_{t+1} \end{pmatrix} += +\underbrace{\begin{pmatrix} R & -R\check G \\ 0 & \check A \end{pmatrix}}_{A} +\begin{pmatrix} b_t \\ z_t \end{pmatrix} ++ +\underbrace{\begin{pmatrix} -R \\ 0 \end{pmatrix}}_{B} c_t ++ +\underbrace{\begin{pmatrix} 0 \\ \check C \end{pmatrix}}_{C}\epsilon_{t+1}. +$$ + + (The sign of $B$ is negative because higher $c_t$ reduces bond accumulation $b_{t+1}$.) + +2. At $\sigma=0$, the robust Bellman problem collapses to the ordinary LQ objective with no + minimising distortion term. Hence the planner/consumer problem is exactly the Part A + permanent-income problem with quadratic utility and linear constraints. + +3. From Part A representation 2, + +$$ +\Delta c_{t+1} = h\,\epsilon_{t+1}, +\qquad h = (1-\beta)\check G (I-\beta\check A)^{-1}\check C. +$$ + + In HST notation, $\alpha^2 = h h'$. For the two-factor calibration, + $\check A=\mathrm{diag}(1,0)$ and $\check C=\mathrm{diag}(\sigma_1,\sigma_2)$, so + +$$ +\alpha^2 = \sigma_1^2 + (1-\beta)^2\sigma_2^2. +$$ + + Permanent shocks get unit weight because they shift lifetime resources one-for-one, while + transitory shocks are annuitised and therefore scaled by $(1-\beta)$ in consumption growth. + +```{solution-end} +``` + +```{exercise-start} +:label: lqcs_ex2 +``` + +**Continuum of robust but observationally equivalent Bewley consumers.** + +Fix a benchmark pair $(\beta_0, \sigma = 0)$ with $R = \beta_0^{-1}$ and define + +$$ +\beta(\sigma) = \beta_0 + \frac{\sigma\alpha^2\beta_0}{1-\beta_0}, +\qquad \sigma \in [-\bar\sigma, 0]. +$$ + +Suppose a unit interval of consumers is indexed by $i$ with type $\sigma^i \in [-\bar\sigma, 0]$ +and discount factor $\beta^i = \beta(\sigma^i)$. + +1. Use Theorem 1 to show that each type has the same consumption rule as the benchmark + $(\beta_0, 0)$ agent. +2. Prove that aggregate consumption and bond-market clearing imply the same equilibrium interest + rate $R = \beta_0^{-1}$ as in the plain-vanilla Bewley model. +3. Explain why agents can be observationally equivalent in quantities while still holding different + worst-case subjective models. + +```{exercise-end} +``` + +```{solution-start} lqcs_ex2 +:class: dropdown +``` + +1. Theorem 1 implies that if $(\sigma^i, \beta^i)$ lies on + +$$ +\beta^i = \beta_0 + \frac{\sigma^i\alpha^2\beta_0}{1-\beta_0}, +$$ + + then type $i$ chooses the same decision rule as the benchmark $(0,\beta_0)$ agent. Therefore + all types share the same consumption policy function $c_t = \mathcal C(b_t,z_t)$. + +2. Since all individual policy rules coincide with benchmark Bewley policies, aggregating over + consumers gives the same goods- and bond-market clearing conditions as Part A. Thus the same + equilibrium supports allocations, namely $R=\beta_0^{-1}$. + +3. Observational equivalence concerns quantities generated by optimal rules. Distinct + $(\sigma^i,\beta^i)$ can generate the same $\{c_t^i,b_t^i\}$ while implying different internal + worst-case beliefs (different distortion processes and likelihood ratios). So quantities alone + do not identify robustness preferences. + +```{solution-end} +``` + +```{exercise-start} +:label: lqcs_ex3 +``` + +**Quantitative comparison across equivalent types.** + +Using calibration values of your choice (for example, those used in the numerical illustration), do +the following: + +1. Plot the observational-equivalence locus $\beta(\sigma)$ over + $\sigma \in [-\bar\sigma, 0]$. +2. Select at least three types on the locus (e.g., low, medium, high robustness). +3. Simulate a common sequence of income shocks and verify numerically that their consumption paths + coincide. +4. For the same types, compute and compare one statistic that depends on the **subjective** model + (e.g., a worst-case persistence coefficient, distorted long-run variance, or detection error + probability). + +Summarise what is and is not identified by data on quantities alone. + +```{exercise-end} +``` + +```{solution-start} lqcs_ex3 +:class: dropdown +``` + +A compact implementation path is: + +1. Choose $(\beta_0,\sigma_1,\sigma_2)$ and compute + +$$ +\alpha^2 = \sigma_1^2 + (1-\beta_0)^2\sigma_2^2, +\qquad +\beta(\sigma)=\beta_0+\frac{\sigma\alpha^2\beta_0}{1-\beta_0}. +$$ + +2. Pick three values $\sigma^{L}<\sigma^{M}<\sigma^{H}\le 0$ and map to + $(\beta^L,\beta^M,\beta^H)$ on the locus. + +3. Simulate one common shock sequence $\{\epsilon_t\}$ and propagate consumption using the + benchmark policy rule. The three consumption paths coincide period-by-period (up to numerical + tolerance). + +4. Compare a subjective-model statistic (for example, a worst-case AR persistence or DEP). It + varies across $\sigma$ even though simulated quantities coincide. + +Conclusion: quantities identify the equilibrium decision rule but not the decomposition between +impatience ($\beta$) and robustness ($\sigma$) along the observational-equivalence locus. + +```{solution-end} +``` diff --git a/lectures/robust_permanent_income.md b/lectures/robust_permanent_income.md new file mode 100644 index 000000000..6dd697f52 --- /dev/null +++ b/lectures/robust_permanent_income.md @@ -0,0 +1,966 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.17.1 +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(robust_permanent_income)= +```{raw} jupyter +