-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdoes-iv-always-identify-late.html
executable file
·417 lines (369 loc) · 23 KB
/
does-iv-always-identify-late.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="HandheldFriendly" content="True" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="robots" content="" />
<link href="https://fonts.googleapis.com/css2?family=Source+Code+Pro:ital,wght@0,400;0,700;1,400&family=Source+Sans+Pro:ital,wght@0,300;0,400;0,700;1,400&display=swap" rel="stylesheet">
<link rel="stylesheet" type="text/css" href="./theme/stylesheet/style.min.css">
<link id="pygments-light-theme" rel="stylesheet" type="text/css"
href="./theme/pygments/github.min.css">
<link rel="stylesheet" type="text/css" href="./theme/font-awesome/css/fontawesome.css">
<link rel="stylesheet" type="text/css" href="./theme/font-awesome/css/brands.css">
<link rel="stylesheet" type="text/css" href="./theme/font-awesome/css/solid.css">
<link href="./static/custom.css" rel="stylesheet">
<link rel="shortcut icon" href="/images/favicon.ico" type="image/x-icon">
<link rel="icon" href="/images/favicon.ico" type="image/x-icon">
<!-- Google Analytics -->
<script type="text/javascript">
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-85196661-1', 'auto');
ga('send', 'pageview');
</script>
<!-- End Google Analytics -->
<!-- Chrome, Firefox OS and Opera -->
<meta name="theme-color" content="#333333">
<!-- Windows Phone -->
<meta name="msapplication-navbutton-color" content="#333333">
<!-- iOS Safari -->
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
<!-- Microsoft EDGE -->
<meta name="msapplication-TileColor" content="#333333">
<meta name="author" content="János K. Divényi" />
<meta name="description" content="Instrumental variables are often used in causal analysis when randomized control trials are out of option. However, it is not always emphasized that the instrumental variable estimator -- even if the instrument is valid and relevant -- does not necessarily identify the average treatment effect (ATE) or the average treatment effect on the treated (ATET); most often, it only identifies the local average treatment effect (LATE): the average treatment effect on the complier subpopulation (see Angrist&Imbens, 1994)." />
<meta name="keywords" content="">
<meta property="og:site_name" content="Divenyi"/>
<meta property="og:title" content="Does IV always identify LATE?"/>
<meta property="og:description" content="Instrumental variables are often used in causal analysis when randomized control trials are out of option. However, it is not always emphasized that the instrumental variable estimator -- even if the instrument is valid and relevant -- does not necessarily identify the average treatment effect (ATE) or the average treatment effect on the treated (ATET); most often, it only identifies the local average treatment effect (LATE): the average treatment effect on the complier subpopulation (see Angrist&Imbens, 1994)."/>
<meta property="og:locale" content="en_US"/>
<meta property="og:url" content="./does-iv-always-identify-late.html"/>
<meta property="og:type" content="article"/>
<meta property="article:published_time" content="2015-03-02 00:00:00+01:00"/>
<meta property="article:modified_time" content=""/>
<meta property="article:author" content="./author/janos-k-divenyi.html">
<meta property="article:section" content="blog"/>
<meta property="og:image" content="../images/divenyi_circle_small.png">
<title>Divenyi – Does IV always identify LATE?</title>
</head>
<body class="light-theme">
<aside>
<div>
<a href=".">
<img src="../images/divenyi_circle_small.png" alt="János K. Divényi" title="János K. Divényi">
</a>
<h1>
<a href=".">János K. Divényi</a>
</h1>
<p>data scientist | PhD in economics</p>
<nav>
<ul class="list">
<li>
<a target="_self"
href="./pages/about-me.html#about-me">
about me
</a>
</li>
<li>
<a target="_self"
href="./pages/research.html#research">
research
</a>
</li>
<li>
<a target="_self"
href="./pages/talks.html#talks">
talks
</a>
</li>
<li>
<a target="_self"
href="./pages/teaching.html#teaching">
teaching
</a>
</li>
</ul>
</nav>
<ul class="social">
<li>
<a class="sc-linkedin" href="http://hu.linkedin.com/in/janosdivenyi" target="_blank">
<i class="fab fa-linkedin"></i>
</a>
</li>
<li>
<a class="sc-twitter" href="https://twitter.com/divenyijanos" target="_blank">
<i class="fab fa-twitter"></i>
</a>
</li>
<li>
<a class="sc-github" href="https://github.com/divenyijanos" target="_blank">
<i class="fab fa-github"></i>
</a>
</li>
<li>
<a class="sc-stack-overflow" href="https://stackoverflow.com/users/3409615/janosdivenyi" target="_blank">
<i class="fab fa-stack-overflow"></i>
</a>
</li>
</ul>
</div>
</aside>
<main>
<article class="single">
<header>
<h1 id="does-iv-always-identify-late">Does IV always identify LATE?</h1>
<p>
Posted on Mon 02 March 2015 in <a href="./category/blog.html">blog</a>
</p>
</header>
<div>
<!-- Latex macros for math -->
<p><span class="math">\(\newcommand{\E}{\mathbb{E}}\)</span>
<span class="math">\(\newcommand{\P}{\mathbb{P}}\)</span></p>
<h2>Local average treatment effect (LATE)</h2>
<p><a href="http://en.wikipedia.org/wiki/Instrumental_variable">Instrumental variables</a> are
often used in causal analysis when randomized control trials are out of option.
However, it is not always emphasized that <strong>the instrumental variable
estimator</strong> -- even if the instrument is valid and relevant -- does not
necessarily identify the average treatment effect (ATE) or the average treatment
effect on the treated (ATET); most often, it <strong>only identifies the local average
treatment effect (LATE)</strong>: the average treatment effect on the complier
subpopulation (see <a href="http://www.jstor.org/stable/2951620">Angrist&Imbens, 1994</a>).</p>
<p>Who are the <strong>compliers</strong>? Unfortunately, we do not know. These are the <strong>people
who are induced into the treatment by the instrumental variable</strong>. But from the
data it is <strong>impossible to tell who</strong> these people really are.</p>
<p>Let us take the standard example of a job training program for unemployed.
The treatment is binary (participating in the program or not) and participants
are assigned to the treatment randomly. However, assignment does not fit
perfectly with actual participation: there are people who take up the treatment
even when non-assigned and other people who are assigned but not take the
treatment. That is, there is some selection into the treatment. In this case --
when certain assumptions hold -- one can <strong>use the assignment variable as
instrument to identify LATE</strong>, the average treatment effect on the complier
population. In other words, we can get the treatment effect on those who take up
the treatment because they are assigned and would not take the treatment had
they not been assigned. This is a useful result even if we are unable to tell
who these people are. But we should keep in mind that this result <strong>only applies
to a subpopulation</strong> and that this <strong>subpopulation is likely to change</strong> when
using another instrument.</p>
<p>Why this fact is often neglected? Because there is a tradition to assume
<strong>homogeneous treatment effect</strong>, that the effect of the
program is the same for everyone. In this case, the estimated effect for the
complier subpopulation is the same as the effect for the whole population. But
most often we cannot be sure that this assumption holds. If there is nothing
which can rule out a <strong>heterogeneous treatment effect</strong>, LATE usually differs
from ATE.</p>
<h2>Statistical intuition</h2>
<p>What is the <strong>statistical intuition</strong> behind this result? In causal analysis we
generally compare outcomes of certain groups who differ in terms of the extent
they get a treatment. Taking the previous example we would like to compare the
outcomes of the group who get the treatment to the group who do not. We relate
the variation in the outcome to the variation in the treatment. In standard
regression framework, we relate variation in <span class="math">\(y\)</span> to variation in <span class="math">\(x\)</span>, and see
whether there is some correlation between them. However, as <a href="http://xkcd.com/552/">correlation does
not imply causation</a> we should <strong>make sure that the
correlation what we measure is only due to <span class="math">\(x\)</span>, and not due to a third
variable.</strong></p>
<p>Assume that our previous example is about a job training program for unemployed.
Even if the program is randomized, people may -- at least to some extent -- have
the choice whether they take the treatment. In such cases we usually observe
that a third (unobservable) variable (let us call it ability), drives part of
the variation in both <span class="math">\(x\)</span> and <span class="math">\(y\)</span>: more able people take the treatment more
likely and also find a job more likely, irrespectively of any program.
Therefore, a simple correlation would show a larger "effect" as it would take up
the correlation generated by the third variable as well, not just the
correlation which goes from <span class="math">\(x\)</span> to <span class="math">\(y\)</span>.</p>
<p>An <strong>instrumental variable enables us to use only a part of variation</strong> in <span class="math">\(x\)</span>
which is independent of the third variable and compare only this part of
variation to the variation in <span class="math">\(y\)</span>. This way we can get a correlation which
indeed measures the part of correlation between <span class="math">\(y\)</span> and <span class="math">\(x\)</span> that is due only to
<span class="math">\(x\)</span>. In other words, we can get the <strong>causal effect</strong> of <span class="math">\(x\)</span> on <span class="math">\(y\)</span>. This is how
two stage least squares estimation works. We just
need a variable which is independent of <span class="math">\(y\)</span> and correlated with <span class="math">\(x\)</span>. That is, a
variable which influences <span class="math">\(y\)</span> only through <span class="math">\(x\)</span>. In case of the job training
program, assignment was truly random so independent of <span class="math">\(y\)</span>. Using the assignment
variable as instrument we can keep the variation in the actual participation in
the treatment which is due to the assignment (and therefore, random) and partial
out the rest which could be due to a third variable (which is also correlated
with the outcome as well). Relating this part of the variation in the treatment
to the variation in the outcome we can get the effect of the treatment on the
outcome -- but be careful how to interpret this effect.</p>
<p>As <strong>only part of the variation</strong> in <span class="math">\(x\)</span> (treatment) was used, we <strong>get the
effect only for this part of the population</strong>. What is the population which is
defined by the variation in <span class="math">\(x\)</span> due the instrument? The complier subpopulation.
Those who participated in the treatment because they were assigned.</p>
<p>I used a simple example of a program to illustrate my points. However, this
reasoning applies to every setup when instrumental variables are used.
<a href="http://davidcard.berkeley.edu/papers/geo_var_schooling.pdf">Card (1995)</a>
tries to estimate the returns to schooling and uses proximity to college as an
instrument. To use this instrument we should assume that proximity to a college
only influences future wages through schooling as those who live closer to a
college are more likely to go to a college. Let us maintain this assumption.
Then the IV estimator identifies LATE: the returns to schooling on the complier
population. That is the effect on those who went to college because they were
living close to a college (and would not have gone to college had they not lived
close).</p>
<h2>Program evaluation language</h2>
<p>Thinking about causal effects is easier when we use the <strong>potential outcome
framework</strong> by <a href="http://en.wikipedia.org/wiki/Donald_Rubin">Donald Rubin</a>. It
builds on the idea that the outcome of each individual depends on her treatment
status: if she gets the treatment she will realize an outcome <span class="math">\(Y(1)\)</span>, whereas
if she does not get the treatment she will realize a potentially different
outcome <span class="math">\(Y(0)\)</span>. These are called potential outcomes as by definition only one
of them will be realized. A person either gets the treatment (and realizes her
potential outcome with the treatment), or does not get the treatment (and
realizes her potential outcome without the treatment).</p>
<p>The <strong>same logic can be applied to the treatment status</strong> which is influenced by the instrument. Considering the assignment to treatment used as instrument, one can be either assigned or not. For an assigned person we can observe whether she participates, but do not know what would she do had she been not assigned. Her potential treatment status given the different values of the instrument can be denoted by <span class="math">\(D(0)\)</span> and <span class="math">\(D(1)\)</span>. In this framework, the compliers are those for whom <span class="math">\(D(1) - D(0) = 1\)</span>: those who participate because the assignment, they participate if assigned <span class="math">\((D(1) = 1)\)</span>, but do not participate if not assigned <span class="math">\((D(0) = 0)\)</span>. In principle, there could be three other types of people: always-takers who take the treatment irrespective of their assignment <span class="math">\((D(1) = D(0) = 1)\)</span>, never takers who do not take the treatment irrespective of their assignment <span class="math">\((D(1) =D(0) = 0)\)</span> and defiers who participate when not assigned but do not participate when assigned. Now we can formally give the <strong>assumptions which are needed for the identification of LATE</strong> (I denote the instrument by <span class="math">\(Z\)</span>).</p>
<ol>
<li><span class="math">\(Z \perp D(0), D(1), Y(0), Y(1)\)</span> -- The instrument is independent of the
potential outcomes ("as good as random"). The instrument is valid.</li>
<li><span class="math">\(\P[D=1 | Z=1] \neq \P[D=1 | Z = 0]\)</span> -- The instrument does influence the
treatment status, that is there are at least some compliers. The instrument is
relevant.</li>
<li><span class="math">\(D(1) \geq D(0)\)</span> -- There are no defiers.</li>
</ol>
<p>The first two assumptions are standard IV assumptions. The third one is new but
usually a much less restrictive assumption than the homogeneity of the treatment
effect. If these assumptions hold, one can derive that the classical IV
estimator identifies LATE. In our training example where the instrument is
binary the resulting estimator collapses to the so called <strong>Wald estimator</strong>:</p>
<div class="math">$$LATE = \E[Y(1) - Y(0)|D(1) - D(0) = 1] =
\frac{\E[Y|Z=1] - \E[Y|Z=0]}{\P[D=1 | Z=1] - \P[D=1 | Z=0]}$$</div>
<p>Intuitively, this formula says the following. There are two subgroups of people:
those who were assigned <span class="math">\((Z = 1)\)</span> and those who were not <span class="math">\((Z = 0)\)</span>. As the
assignment is random regarding the potential outcomes two results follow: (1)
both subgroups have the same distribution of types regarding their potential
treatments (compliers, always-takers and never-takers), and (2) both subgroups
have the same distribution of potential outcomes. Therefore, if we see any
difference between the outcomes of the subgroups, this difference could be only
due to the fact that in the assigned group compliers realized <span class="math">\(Y(1)\)</span> and in the
non-assigned group compliers realized <span class="math">\(Y(0)\)</span>. That is <strong>the difference in the
average outcomes shows the effect of the program on the compliers</strong>. One
should just adjust by the ratio of compliers to get LATE.</p>
<p>The <strong>program evaluation language extends naturally to other setups</strong> as well.
Let us consider the example of returns to schooling. <span class="math">\(Y(1)\)</span> expresses the
potential wage of the individual if she goes to college and <span class="math">\(Y(0)\)</span> stands for
the potential wage of the individual if she does not go to college. <span class="math">\(D(1)\)</span>
says whether the individual goes to college if she lives close to a college
and <span class="math">\(D(0)\)</span> expresses whether the individual goes to college if she lives far
from a college. The IV estimator gives us the returns to schooling for those
who went to college because of living close to a college.</p>
<h1>Long story short</h1>
<ul>
<li>Classical IV estimator most often identifies LATE: average treatment effect on
the complier population. Complier population consists of those who take up the
treatment because of the instrument. </li>
<li>If treatment effect is heterogeneous, LATE differs from ATE or ATET.</li>
<li>Statistical intuition: When using IV, we partial out variation in the
dependent variable <span class="math">\(x\)</span> which is not due to the instrument <span class="math">\(z\)</span>. So any
correlation between the outcome <span class="math">\(y\)</span> and the partialled out <span class="math">\(x\)</span> shows the
correlation for those units where <span class="math">\(x\)</span> varies because of <span class="math">\(z\)</span>. </li>
<li>The potential outcomes framework makes thinking about causality easier. </li>
<li>As instrument is random regarding the potential outcomes, the subgroups
defined by the instrument should be the same in terms of their potential outcome
distributions.</li>
<li>If we see any difference in the average outcomes of the subgroups, this could
be due only to different take up rates, showing the effect of the program.
One should only scale by the difference of the take up rates to get LATE.</li>
</ul>
<script type="text/javascript">if (!document.getElementById('mathjaxscript_pelican_#%@#$@#')) {
var align = "center",
indent = "0em",
linebreak = "false";
if (false) {
align = (screen.width < 768) ? "left" : align;
indent = (screen.width < 768) ? "0em" : indent;
linebreak = (screen.width < 768) ? 'true' : linebreak;
}
var mathjaxscript = document.createElement('script');
mathjaxscript.id = 'mathjaxscript_pelican_#%@#$@#';
mathjaxscript.type = 'text/javascript';
mathjaxscript.src = 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.3/latest.js?config=TeX-AMS-MML_HTMLorMML';
var configscript = document.createElement('script');
configscript.type = 'text/x-mathjax-config';
configscript[(window.opera ? "innerHTML" : "text")] =
"MathJax.Hub.Config({" +
" config: ['MMLorHTML.js']," +
" TeX: { extensions: ['AMSmath.js','AMSsymbols.js','noErrors.js','noUndefined.js'], equationNumbers: { autoNumber: 'none' } }," +
" jax: ['input/TeX','input/MathML','output/HTML-CSS']," +
" extensions: ['tex2jax.js','mml2jax.js','MathMenu.js','MathZoom.js']," +
" displayAlign: '"+ align +"'," +
" displayIndent: '"+ indent +"'," +
" showMathMenu: true," +
" messageStyle: 'normal'," +
" tex2jax: { " +
" inlineMath: [ ['\\\\(','\\\\)'] ], " +
" displayMath: [ ['$$','$$'] ]," +
" processEscapes: true," +
" preview: 'TeX'," +
" }, " +
" 'HTML-CSS': { " +
" availableFonts: ['STIX', 'TeX']," +
" preferredFont: 'STIX'," +
" styles: { '.MathJax_Display, .MathJax .mo, .MathJax .mi, .MathJax .mn': {color: 'black ! important'} }," +
" linebreaks: { automatic: "+ linebreak +", width: '90% container' }," +
" }, " +
"}); " +
"if ('default' !== 'default') {" +
"MathJax.Hub.Register.StartupHook('HTML-CSS Jax Ready',function () {" +
"var VARIANT = MathJax.OutputJax['HTML-CSS'].FONTDATA.VARIANT;" +
"VARIANT['normal'].fonts.unshift('MathJax_default');" +
"VARIANT['bold'].fonts.unshift('MathJax_default-bold');" +
"VARIANT['italic'].fonts.unshift('MathJax_default-italic');" +
"VARIANT['-tex-mathit'].fonts.unshift('MathJax_default-italic');" +
"});" +
"MathJax.Hub.Register.StartupHook('SVG Jax Ready',function () {" +
"var VARIANT = MathJax.OutputJax.SVG.FONTDATA.VARIANT;" +
"VARIANT['normal'].fonts.unshift('MathJax_default');" +
"VARIANT['bold'].fonts.unshift('MathJax_default-bold');" +
"VARIANT['italic'].fonts.unshift('MathJax_default-italic');" +
"VARIANT['-tex-mathit'].fonts.unshift('MathJax_default-italic');" +
"});" +
"}";
(document.body || document.getElementsByTagName('head')[0]).appendChild(configscript);
(document.body || document.getElementsByTagName('head')[0]).appendChild(mathjaxscript);
}
</script>
</div>
<div class="tag-cloud">
<p>
</p>
</div>
<!-- Disqus -->
<div id="disqus_thread"></div>
<script type="text/javascript">
var disqus_shortname = 'divenyijanos';
(function() {
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
</script>
<noscript>
Please enable JavaScript to view comments.
</noscript>
<!-- End Disqus -->
</article>
<footer>
<p>© </p>
<p>
Built with <a href="http://getpelican.com" target="_blank">Pelican</a> using <a href="http://bit.ly/flex-pelican" target="_blank">Flex</a> theme
</p> </footer>
</main>
<script type="application/ld+json">
{
"@context" : "http://schema.org",
"@type" : "Blog",
"name": " Divenyi ",
"url" : ".",
"image": "../images/divenyi_circle_small.png",
"description": ""
}
</script>
</body>
</html>