does-iv-always-identify-late.html


<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8" />
  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
  <meta name="HandheldFriendly" content="True" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <meta name="robots" content="" />

  <link href="https://fonts.googleapis.com/css2?family=Source+Code+Pro:ital,wght@0,400;0,700;1,400&family=Source+Sans+Pro:ital,wght@0,300;0,400;0,700;1,400&display=swap" rel="stylesheet">

    <link rel="stylesheet" type="text/css" href="./theme/stylesheet/style.min.css">


    <link id="pygments-light-theme" rel="stylesheet" type="text/css"
          href="./theme/pygments/github.min.css">


  <link rel="stylesheet" type="text/css" href="./theme/font-awesome/css/fontawesome.css">
  <link rel="stylesheet" type="text/css" href="./theme/font-awesome/css/brands.css">
  <link rel="stylesheet" type="text/css" href="./theme/font-awesome/css/solid.css">

    <link href="./static/custom.css" rel="stylesheet">


    <link rel="shortcut icon" href="/images/favicon.ico" type="image/x-icon">
    <link rel="icon" href="/images/favicon.ico" type="image/x-icon">

<!-- Google Analytics -->
<script type="text/javascript">
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-85196661-1', 'auto');
  ga('send', 'pageview');
</script>
<!-- End Google Analytics -->
    <!-- Chrome, Firefox OS and Opera -->
    <meta name="theme-color" content="#333333">
    <!-- Windows Phone -->
    <meta name="msapplication-navbutton-color" content="#333333">
    <!-- iOS Safari -->
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
    <!-- Microsoft EDGE -->
    <meta name="msapplication-TileColor" content="#333333">

<meta name="author" content="János K. Divényi" />
<meta name="description" content="Instrumental variables are often used in causal analysis when randomized control trials are out of option. However, it is not always emphasized that the instrumental variable estimator -- even if the instrument is valid and relevant -- does not necessarily identify the average treatment effect (ATE) or the average treatment effect on the treated (ATET); most often, it only identifies the local average treatment effect (LATE): the average treatment effect on the complier subpopulation (see Angrist&amp;Imbens, 1994)." />
<meta name="keywords" content="">


<meta property="og:site_name" content="Divenyi"/>
<meta property="og:title" content="Does IV always identify LATE?"/>
<meta property="og:description" content="Instrumental variables are often used in causal analysis when randomized control trials are out of option. However, it is not always emphasized that the instrumental variable estimator -- even if the instrument is valid and relevant -- does not necessarily identify the average treatment effect (ATE) or the average treatment effect on the treated (ATET); most often, it only identifies the local average treatment effect (LATE): the average treatment effect on the complier subpopulation (see Angrist&amp;Imbens, 1994)."/>
<meta property="og:locale" content="en_US"/>
<meta property="og:url" content="./does-iv-always-identify-late.html"/>
<meta property="og:type" content="article"/>
<meta property="article:published_time" content="2015-03-02 00:00:00+01:00"/>
<meta property="article:modified_time" content=""/>
<meta property="article:author" content="./author/janos-k-divenyi.html">
<meta property="article:section" content="blog"/>
<meta property="og:image" content="../images/divenyi_circle_small.png">

  <title>Divenyi &ndash; Does IV always identify LATE?</title>

</head>
<body class="light-theme">
  <aside>
    <div>
      <a href=".">
        <img src="../images/divenyi_circle_small.png" alt="János K. Divényi" title="János K. Divényi">
      </a>

      <h1>
        <a href=".">János K. Divényi</a>
      </h1>

<p>data scientist | PhD in economics</p>

      <nav>
        <ul class="list">


              <li>
                <a target="_self"
                   href="./pages/about-me.html#about-me">
                  about me
                </a>
              </li>
              <li>
                <a target="_self"
                   href="./pages/research.html#research">
                  research
                </a>
              </li>
              <li>
                <a target="_self"
                   href="./pages/talks.html#talks">
                  talks
                </a>
              </li>
              <li>
                <a target="_self"
                   href="./pages/teaching.html#teaching">
                  teaching
                </a>
              </li>

        </ul>
      </nav>

      <ul class="social">
          <li>
            <a  class="sc-linkedin" href="http://hu.linkedin.com/in/janosdivenyi" target="_blank">
              <i class="fab fa-linkedin"></i>
            </a>
          </li>
          <li>
            <a  class="sc-twitter" href="https://twitter.com/divenyijanos" target="_blank">
              <i class="fab fa-twitter"></i>
            </a>
          </li>
          <li>
            <a  class="sc-github" href="https://github.com/divenyijanos" target="_blank">
              <i class="fab fa-github"></i>
            </a>
          </li>
          <li>
            <a  class="sc-stack-overflow" href="https://stackoverflow.com/users/3409615/janosdivenyi" target="_blank">
              <i class="fab fa-stack-overflow"></i>
            </a>
          </li>
      </ul>
    </div>

  </aside>
  <main>


<article class="single">
  <header>
      
    <h1 id="does-iv-always-identify-late">Does IV always identify LATE?</h1>
    <p>
      Posted on Mon 02 March 2015 in <a href="./category/blog.html">blog</a>

    </p>
  </header>


  <div>
    <!-- Latex macros for math -->
<p><span class="math">\(\newcommand{\E}{\mathbb{E}}\)</span>
<span class="math">\(\newcommand{\P}{\mathbb{P}}\)</span></p>
<h2>Local average treatment effect (LATE)</h2>
<p><a href="http://en.wikipedia.org/wiki/Instrumental_variable">Instrumental variables</a> are
often used in causal analysis when randomized control trials are out of option.
However, it is not always emphasized that <strong>the instrumental variable
estimator</strong> -- even if the instrument is valid and relevant --  does not
necessarily identify the average treatment effect (ATE) or the average treatment
effect on the treated (ATET); most often, it <strong>only identifies the local average
treatment effect (LATE)</strong>: the average treatment effect on the complier
subpopulation (see <a href="http://www.jstor.org/stable/2951620">Angrist&amp;Imbens, 1994</a>).</p>
<p>Who are the <strong>compliers</strong>? Unfortunately, we do not know. These are the <strong>people
who are induced into the treatment by the instrumental variable</strong>. But from the
data it is <strong>impossible to tell who</strong> these people really are.</p>
<p>Let us take the standard example of a job training program for unemployed. 
The treatment is binary (participating in the program or not) and participants
are assigned to the treatment randomly. However, assignment does not fit
perfectly with actual participation: there are people who take up the treatment
even when non-assigned and other people who are assigned but not take the
treatment. That is, there is some selection into the treatment. In this case --
when certain assumptions hold -- one can <strong>use the assignment variable as
instrument to identify LATE</strong>, the average treatment effect on the complier
population. In other words, we can get the treatment effect on those who take up
the treatment because they are assigned and would not take the treatment had
they not been assigned. This is a useful result even if we are unable to tell
who these people are. But we should keep in mind that this result <strong>only applies
to a subpopulation</strong> and that this <strong>subpopulation is likely to change</strong> when
using another instrument.</p>
<p>Why this fact is often neglected? Because there is a tradition to assume
<strong>homogeneous treatment effect</strong>, that the effect of the
program is the same for everyone. In this case, the estimated effect for the
complier  subpopulation is the same as the effect for the whole population. But
most often we cannot be sure that this assumption holds. If there is nothing 
which can rule out a <strong>heterogeneous treatment effect</strong>, LATE usually differs
from ATE.</p>
<h2>Statistical intuition</h2>
<p>What is the <strong>statistical intuition</strong> behind this result? In causal analysis we
generally compare outcomes of certain groups who differ in terms of the extent
they get a treatment. Taking the previous example we would like to  compare the
outcomes of the group who get the treatment to the group who do not. We relate
the variation in the outcome to the variation in the treatment.  In standard
regression framework, we relate variation in <span class="math">\(y\)</span> to variation in <span class="math">\(x\)</span>,  and see
whether there is some correlation between them. However, as <a href="http://xkcd.com/552/">correlation does
not imply causation</a> we should <strong>make sure that the
correlation what we measure is only due to <span class="math">\(x\)</span>, and not due to a third
variable.</strong></p>
<p>Assume that our previous example is about a job training program for unemployed.
Even if the program is randomized, people may -- at least to some extent -- have
the choice whether they take the treatment. In such cases we usually observe
that a third (unobservable) variable (let us call it ability), drives part of
the variation in both <span class="math">\(x\)</span> and <span class="math">\(y\)</span>: more able people take the treatment more
likely and also find a job more likely, irrespectively of any program.
Therefore, a simple correlation would show a larger "effect" as it would take up
the correlation generated by the third variable as well, not just the
correlation which goes from <span class="math">\(x\)</span> to <span class="math">\(y\)</span>.</p>
<p>An <strong>instrumental variable enables us to use only a part of variation</strong> in <span class="math">\(x\)</span>
which is independent of the third variable and compare only this part of
variation to the variation in <span class="math">\(y\)</span>. This way we can get a correlation which
indeed measures the part of correlation between <span class="math">\(y\)</span> and <span class="math">\(x\)</span> that is due only to
<span class="math">\(x\)</span>. In other words, we can get the <strong>causal effect</strong> of <span class="math">\(x\)</span> on <span class="math">\(y\)</span>. This is how
two stage least squares estimation works. We just
need a variable which is independent of <span class="math">\(y\)</span> and correlated with <span class="math">\(x\)</span>. That is, a
variable which influences <span class="math">\(y\)</span> only through <span class="math">\(x\)</span>. In case of the job training
program, assignment was truly random so independent of <span class="math">\(y\)</span>. Using the assignment
variable as instrument  we can keep the variation in the actual participation in
the treatment which is due to the assignment (and therefore, random) and partial
out the rest which could be due to a third variable (which is also correlated
with the outcome as well). Relating this part of the variation in the treatment
to the variation in the outcome we can get the effect of the treatment on the
outcome -- but be careful how to interpret this effect.</p>
<p>As <strong>only part of the variation</strong> in <span class="math">\(x\)</span> (treatment) was used, we <strong>get the
effect only for this part of the population</strong>. What is the population which is
defined by the variation in <span class="math">\(x\)</span> due the instrument? The complier subpopulation.
Those who participated in the treatment because they were assigned.</p>
<p>I used a simple example of a program to illustrate my points. However, this
reasoning applies to every setup when instrumental variables are used. 
<a href="http://davidcard.berkeley.edu/papers/geo_var_schooling.pdf">Card (1995)</a>
tries to estimate the returns to schooling and uses proximity to college as an
instrument. To use this instrument we should assume that proximity to a college
only influences future wages through schooling as those who live closer to a
college are more likely to go to a college. Let us maintain this assumption.
Then the IV estimator identifies LATE: the returns to schooling on the complier
population. That is the effect on those who went to college because they were 
living close to a college (and would not have gone to college had they not lived
close).</p>
<h2>Program evaluation language</h2>
<p>Thinking about causal effects is easier when we use the <strong>potential outcome 
framework</strong> by <a href="http://en.wikipedia.org/wiki/Donald_Rubin">Donald Rubin</a>. It 
builds on the idea that the outcome of each individual depends on her treatment
status: if she gets the treatment she will realize an outcome <span class="math">\(Y(1)\)</span>, whereas
if she does not get the treatment she will realize a potentially different 
outcome <span class="math">\(Y(0)\)</span>. These are called potential outcomes as by definition only one
of them will be realized. A person either gets the treatment (and realizes her
potential outcome with the treatment), or does not get the treatment (and 
realizes her potential outcome without the treatment).</p>
<p>The <strong>same logic can be applied to the treatment status</strong> which is influenced by the instrument. Considering the assignment to treatment used as instrument, one can be either assigned or not. For an assigned person we can observe whether she participates, but do not know what would she do had she been not assigned. Her potential treatment status given the different values of the instrument can be denoted by <span class="math">\(D(0)\)</span> and <span class="math">\(D(1)\)</span>. In this framework, the compliers are those for whom <span class="math">\(D(1) - D(0) = 1\)</span>: those who participate because the assignment, they participate if assigned <span class="math">\((D(1) = 1)\)</span>, but do not participate if not assigned <span class="math">\((D(0) = 0)\)</span>. In principle, there could be three other types of people: always-takers who take the treatment irrespective of their assignment <span class="math">\((D(1) = D(0) = 1)\)</span>, never takers who do not take the treatment irrespective of their assignment <span class="math">\((D(1) =D(0) = 0)\)</span> and defiers who participate when not assigned but do not participate when assigned. Now we can formally give the <strong>assumptions which are needed for the identification of LATE</strong> (I denote the instrument by <span class="math">\(Z\)</span>).</p>
<ol>
<li><span class="math">\(Z \perp D(0), D(1), Y(0), Y(1)\)</span> -- The instrument is independent of the
potential outcomes ("as good as random"). The instrument is valid.</li>
<li><span class="math">\(\P[D=1 | Z=1] \neq \P[D=1 | Z = 0]\)</span> -- The instrument does influence the
treatment status, that is there are at least some compliers. The instrument is
relevant.</li>
<li><span class="math">\(D(1) \geq D(0)\)</span> -- There are no defiers.</li>
</ol>
<p>The first two assumptions are standard IV assumptions. The third one is new but
usually a much less restrictive assumption than the homogeneity of the treatment
effect. If these assumptions hold, one can derive that the classical IV
estimator identifies LATE. In our training example where the instrument is
binary the resulting estimator collapses to the so called <strong>Wald estimator</strong>:</p>
<div class="math">$$LATE = \E[Y(1) - Y(0)|D(1) - D(0) = 1] = 
\frac{\E[Y|Z=1] - \E[Y|Z=0]}{\P[D=1 | Z=1] - \P[D=1 | Z=0]}$$</div>
<p>Intuitively, this formula says the following. There are two subgroups of people:
those who were assigned <span class="math">\((Z = 1)\)</span> and those who were not <span class="math">\((Z = 0)\)</span>. As the
assignment is random regarding the potential outcomes two results follow: (1)
both subgroups have the same distribution of types regarding their potential
treatments (compliers, always-takers and never-takers), and (2) both subgroups
have the same distribution of potential outcomes. Therefore, if we see any
difference between the outcomes of the subgroups, this difference could be only
due to the fact that in the assigned group compliers realized <span class="math">\(Y(1)\)</span> and in the
non-assigned group compliers realized <span class="math">\(Y(0)\)</span>. That is <strong>the difference in the 
average outcomes shows the effect of the program on the compliers</strong>. One
should just adjust by the ratio of compliers to get LATE.</p>
<p>The <strong>program evaluation language extends naturally to other setups</strong> as well.
Let us consider the example of returns to schooling. <span class="math">\(Y(1)\)</span> expresses the 
potential wage of the individual if she goes to college and <span class="math">\(Y(0)\)</span> stands for
the potential wage of the individual if she does not go to college. <span class="math">\(D(1)\)</span> 
says whether the individual goes to college if she lives close to a college
and <span class="math">\(D(0)\)</span> expresses whether the individual goes to college if she lives far
from a college. The IV estimator gives us the returns to schooling for those
who went to college because of living close to a college.</p>
<h1>Long story short</h1>
<ul>
<li>Classical IV estimator most often identifies LATE: average treatment effect on
the complier population. Complier population consists of those who take up the
treatment because of the instrument. </li>
<li>If treatment effect is heterogeneous, LATE differs from ATE or ATET.</li>
<li>Statistical intuition: When using IV, we partial out variation in the 
dependent variable <span class="math">\(x\)</span> which is not due to the instrument <span class="math">\(z\)</span>. So any
correlation between the outcome <span class="math">\(y\)</span> and the partialled out <span class="math">\(x\)</span> shows the 
correlation for those units where <span class="math">\(x\)</span> varies because of <span class="math">\(z\)</span>. </li>
<li>The potential outcomes framework makes thinking about causality easier. </li>
<li>As instrument is random regarding the potential outcomes, the subgroups 
defined by the instrument should be the same in terms of their potential outcome
distributions.</li>
<li>If we see any difference in the average outcomes of the subgroups, this could
be due only to different take up rates, showing the effect of the program. 
One should only scale by the difference of the take up rates to get LATE.</li>
</ul>
<script type="text/javascript">if (!document.getElementById('mathjaxscript_pelican_#%@#$@#')) {
    var align = "center",
        indent = "0em",
        linebreak = "false";

    if (false) {
        align = (screen.width < 768) ? "left" : align;
        indent = (screen.width < 768) ? "0em" : indent;
        linebreak = (screen.width < 768) ? 'true' : linebreak;
    }

    var mathjaxscript = document.createElement('script');
    mathjaxscript.id = 'mathjaxscript_pelican_#%@#$@#';
    mathjaxscript.type = 'text/javascript';
    mathjaxscript.src = 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.3/latest.js?config=TeX-AMS-MML_HTMLorMML';

    var configscript = document.createElement('script');
    configscript.type = 'text/x-mathjax-config';
    configscript[(window.opera ? "innerHTML" : "text")] =
        "MathJax.Hub.Config({" +
        "    config: ['MMLorHTML.js']," +
        "    TeX: { extensions: ['AMSmath.js','AMSsymbols.js','noErrors.js','noUndefined.js'], equationNumbers: { autoNumber: 'none' } }," +
        "    jax: ['input/TeX','input/MathML','output/HTML-CSS']," +
        "    extensions: ['tex2jax.js','mml2jax.js','MathMenu.js','MathZoom.js']," +
        "    displayAlign: '"+ align +"'," +
        "    displayIndent: '"+ indent +"'," +
        "    showMathMenu: true," +
        "    messageStyle: 'normal'," +
        "    tex2jax: { " +
        "        inlineMath: [ ['\\\\(','\\\\)'] ], " +
        "        displayMath: [ ['$$','$$'] ]," +
        "        processEscapes: true," +
        "        preview: 'TeX'," +
        "    }, " +
        "    'HTML-CSS': { " +
        "        availableFonts: ['STIX', 'TeX']," +
        "        preferredFont: 'STIX'," +
        "        styles: { '.MathJax_Display, .MathJax .mo, .MathJax .mi, .MathJax .mn': {color: 'black ! important'} }," +
        "        linebreaks: { automatic: "+ linebreak +", width: '90% container' }," +
        "    }, " +
        "}); " +
        "if ('default' !== 'default') {" +
            "MathJax.Hub.Register.StartupHook('HTML-CSS Jax Ready',function () {" +
                "var VARIANT = MathJax.OutputJax['HTML-CSS'].FONTDATA.VARIANT;" +
                "VARIANT['normal'].fonts.unshift('MathJax_default');" +
                "VARIANT['bold'].fonts.unshift('MathJax_default-bold');" +
                "VARIANT['italic'].fonts.unshift('MathJax_default-italic');" +
                "VARIANT['-tex-mathit'].fonts.unshift('MathJax_default-italic');" +
            "});" +
            "MathJax.Hub.Register.StartupHook('SVG Jax Ready',function () {" +
                "var VARIANT = MathJax.OutputJax.SVG.FONTDATA.VARIANT;" +
                "VARIANT['normal'].fonts.unshift('MathJax_default');" +
                "VARIANT['bold'].fonts.unshift('MathJax_default-bold');" +
                "VARIANT['italic'].fonts.unshift('MathJax_default-italic');" +
                "VARIANT['-tex-mathit'].fonts.unshift('MathJax_default-italic');" +
            "});" +
        "}";

    (document.body || document.getElementsByTagName('head')[0]).appendChild(configscript);
    (document.body || document.getElementsByTagName('head')[0]).appendChild(mathjaxscript);
}
</script>
  </div>
  <div class="tag-cloud">
    <p>
    </p>
  </div>


<!-- Disqus -->
<div id="disqus_thread"></div>
<script type="text/javascript">
    var disqus_shortname = 'divenyijanos';
    (function() {
        var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
        dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
        (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
    })();
</script>
<noscript>
    Please enable JavaScript to view comments.
</noscript>
<!-- End Disqus -->
</article>

    <footer>
<p>&copy;  </p>
<p>
Built with <a href="http://getpelican.com" target="_blank">Pelican</a> using <a href="http://bit.ly/flex-pelican" target="_blank">Flex</a> theme
</p>    </footer>
  </main>


<script type="application/ld+json">
{
  "@context" : "http://schema.org",
  "@type" : "Blog",
  "name": " Divenyi ",
  "url" : ".",
  "image": "../images/divenyi_circle_small.png",
  "description": ""
}
</script>


</body>
</html>