-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDAC1.html
962 lines (916 loc) · 116 KB
/
DAC1.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<title>3 Using the Data Agreement Criterion to Rank Experts’ Beliefs | Dissertation_Duco_Veen.utf8.md</title>
<meta name="description" content="" />
<meta name="generator" content="bookdown 0.14 and GitBook 2.6.7" />
<meta property="og:title" content="3 Using the Data Agreement Criterion to Rank Experts’ Beliefs | Dissertation_Duco_Veen.utf8.md" />
<meta property="og:type" content="book" />
<meta property="og:url" content="https://github.com/VeenDuco/Dissertation/" />
<meta name="github-repo" content="VeenDuco/Dissertation" />
<meta name="twitter:card" content="summary" />
<meta name="twitter:title" content="3 Using the Data Agreement Criterion to Rank Experts’ Beliefs | Dissertation_Duco_Veen.utf8.md" />
<meta name="author" content="Duco Veen" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="apple-mobile-web-app-capable" content="yes" />
<meta name="apple-mobile-web-app-status-bar-style" content="black" />
<link rel="shortcut icon" href="favicon.ico" type="image/x-icon" />
<link rel="prev" href="fivestep.html"/>
<link rel="next" href="Hierarchical.html"/>
<script src="libs/jquery-2.2.3/jquery.min.js"></script>
<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
<style type="text/css">
a.sourceLine { display: inline-block; line-height: 1.25; }
a.sourceLine { pointer-events: none; color: inherit; text-decoration: inherit; }
a.sourceLine:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode { white-space: pre; position: relative; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
code.sourceCode { white-space: pre-wrap; }
a.sourceLine { text-indent: -1em; padding-left: 1em; }
}
pre.numberSource a.sourceLine
{ position: relative; left: -4em; }
pre.numberSource a.sourceLine::before
{ content: attr(title);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; pointer-events: all; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
a.sourceLine::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>
</head>
<body>
<div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
<div class="book-summary">
<nav role="navigation">
<ul class="summary">
<li class="chapter" data-level="1" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i><b>1</b> Introduction</a><ul>
<li class="chapter" data-level="1.1" data-path="index.html"><a href="index.html#bayesian-statistics"><i class="fa fa-check"></i><b>1.1</b> Bayesian Statistics</a></li>
<li class="chapter" data-level="1.2" data-path="index.html"><a href="index.html#prior-information"><i class="fa fa-check"></i><b>1.2</b> Prior Information</a></li>
<li class="chapter" data-level="1.3" data-path="index.html"><a href="index.html#expert-elicitation"><i class="fa fa-check"></i><b>1.3</b> Expert Elicitation</a></li>
<li class="chapter" data-level="1.4" data-path="index.html"><a href="index.html#aims-and-outline"><i class="fa fa-check"></i><b>1.4</b> Aims and Outline</a></li>
</ul></li>
<li class="chapter" data-level="2" data-path="fivestep.html"><a href="fivestep.html"><i class="fa fa-check"></i><b>2</b> Proposal for a Five-Step Method to Elicit Expert Judgment</a><ul>
<li class="chapter" data-level="" data-path="fivestep.html"><a href="fivestep.html#abstract"><i class="fa fa-check"></i>Abstract</a></li>
<li class="chapter" data-level="2.1" data-path="fivestep.html"><a href="fivestep.html#ch02introduction"><i class="fa fa-check"></i><b>2.1</b> Introduction</a></li>
<li class="chapter" data-level="2.2" data-path="fivestep.html"><a href="fivestep.html#five-step-method"><i class="fa fa-check"></i><b>2.2</b> Five-Step Method</a><ul>
<li class="chapter" data-level="2.2.1" data-path="fivestep.html"><a href="fivestep.html#step-1"><i class="fa fa-check"></i><b>2.2.1</b> Step 1</a></li>
<li class="chapter" data-level="2.2.2" data-path="fivestep.html"><a href="fivestep.html#step-2"><i class="fa fa-check"></i><b>2.2.2</b> Step 2</a></li>
<li class="chapter" data-level="2.2.3" data-path="fivestep.html"><a href="fivestep.html#step-3"><i class="fa fa-check"></i><b>2.2.3</b> Step 3</a></li>
<li class="chapter" data-level="2.2.4" data-path="fivestep.html"><a href="fivestep.html#step-4"><i class="fa fa-check"></i><b>2.2.4</b> Step 4</a></li>
<li class="chapter" data-level="2.2.5" data-path="fivestep.html"><a href="fivestep.html#step-5"><i class="fa fa-check"></i><b>2.2.5</b> Step 5</a></li>
</ul></li>
<li class="chapter" data-level="2.3" data-path="fivestep.html"><a href="fivestep.html#elicitation-studies"><i class="fa fa-check"></i><b>2.3</b> Elicitation Studies</a><ul>
<li class="chapter" data-level="2.3.1" data-path="fivestep.html"><a href="fivestep.html#user-feasibility-study"><i class="fa fa-check"></i><b>2.3.1</b> User Feasibility Study</a></li>
<li class="chapter" data-level="2.3.2" data-path="fivestep.html"><a href="fivestep.html#elicitation-staffing-company"><i class="fa fa-check"></i><b>2.3.2</b> Elicitation Staffing Company</a></li>
<li class="chapter" data-level="2.3.3" data-path="fivestep.html"><a href="fivestep.html#elicitation-large-financial-institution"><i class="fa fa-check"></i><b>2.3.3</b> Elicitation Large Financial Institution</a></li>
</ul></li>
<li class="chapter" data-level="2.4" data-path="fivestep.html"><a href="fivestep.html#ch02discussion"><i class="fa fa-check"></i><b>2.4</b> Discussion</a></li>
<li class="chapter" data-level="" data-path="fivestep.html"><a href="fivestep.html#ch02ethics"><i class="fa fa-check"></i>Ethics Statement</a></li>
<li class="chapter" data-level="" data-path="fivestep.html"><a href="fivestep.html#ch02funding"><i class="fa fa-check"></i>Funding</a></li>
<li class="chapter" data-level="" data-path="fivestep.html"><a href="fivestep.html#ch02acknowledgments"><i class="fa fa-check"></i>Acknowledgments</a></li>
<li class="chapter" data-level="" data-path="fivestep.html"><a href="fivestep.html#ch02conflict"><i class="fa fa-check"></i>Conflict of Interest Statement</a></li>
</ul></li>
<li class="chapter" data-level="3" data-path="DAC1.html"><a href="DAC1.html"><i class="fa fa-check"></i><b>3</b> Using the Data Agreement Criterion to Rank Experts’ Beliefs</a><ul>
<li class="chapter" data-level="" data-path="DAC1.html"><a href="DAC1.html#abstract-1"><i class="fa fa-check"></i>Abstract</a></li>
<li class="chapter" data-level="3.1" data-path="DAC1.html"><a href="DAC1.html#ch03introduction"><i class="fa fa-check"></i><b>3.1</b> Introduction</a></li>
<li class="chapter" data-level="3.2" data-path="DAC1.html"><a href="DAC1.html#expert-data-disagreement"><i class="fa fa-check"></i><b>3.2</b> Expert-Data (Dis)Agreement</a><ul>
<li class="chapter" data-level="3.2.1" data-path="DAC1.html"><a href="DAC1.html#data-agreement-criterion"><i class="fa fa-check"></i><b>3.2.1</b> Data Agreement Criterion</a></li>
<li class="chapter" data-level="3.2.2" data-path="DAC1.html"><a href="DAC1.html#DACvsBF"><i class="fa fa-check"></i><b>3.2.2</b> Comparison to Ranking by the Bayes Factor</a></li>
<li class="chapter" data-level="3.2.3" data-path="DAC1.html"><a href="DAC1.html#DACvsBF2"><i class="fa fa-check"></i><b>3.2.3</b> DAC Versus BF</a></li>
</ul></li>
<li class="chapter" data-level="3.3" data-path="DAC1.html"><a href="DAC1.html#empirical-example"><i class="fa fa-check"></i><b>3.3</b> Empirical Example</a><ul>
<li class="chapter" data-level="3.3.1" data-path="DAC1.html"><a href="DAC1.html#elicitation-procedure"><i class="fa fa-check"></i><b>3.3.1</b> Elicitation Procedure</a></li>
<li class="chapter" data-level="3.3.2" data-path="DAC1.html"><a href="DAC1.html#ranking-the-experts"><i class="fa fa-check"></i><b>3.3.2</b> Ranking the Experts</a></li>
</ul></li>
<li class="chapter" data-level="3.4" data-path="DAC1.html"><a href="DAC1.html#ch03discussion"><i class="fa fa-check"></i><b>3.4</b> Discussion</a></li>
<li class="chapter" data-level="" data-path="DAC1.html"><a href="DAC1.html#ch03ethics"><i class="fa fa-check"></i>Ethics Statement</a></li>
<li class="chapter" data-level="" data-path="DAC1.html"><a href="DAC1.html#ch03funding"><i class="fa fa-check"></i>Funding</a></li>
<li class="chapter" data-level="" data-path="DAC1.html"><a href="DAC1.html#ch03acknowledgments"><i class="fa fa-check"></i>Acknowledgments</a></li>
<li class="chapter" data-level="" data-path="DAC1.html"><a href="DAC1.html#ch03conflict"><i class="fa fa-check"></i>Conflicts of Interest Statement</a></li>
</ul></li>
<li class="chapter" data-level="4" data-path="Hierarchical.html"><a href="Hierarchical.html"><i class="fa fa-check"></i><b>4</b> A Step Forward: Bayesian Hierarchical Modelling as a Tool in Assessment of Individual Discrimination Performance</a><ul>
<li class="chapter" data-level="" data-path="Hierarchical.html"><a href="Hierarchical.html#abstract-2"><i class="fa fa-check"></i>Abstract</a></li>
<li class="chapter" data-level="4.1" data-path="Hierarchical.html"><a href="Hierarchical.html#ch04introduction"><i class="fa fa-check"></i><b>4.1</b> Introduction</a></li>
<li class="chapter" data-level="4.2" data-path="Hierarchical.html"><a href="Hierarchical.html#method"><i class="fa fa-check"></i><b>4.2</b> Method</a><ul>
<li class="chapter" data-level="4.2.1" data-path="Hierarchical.html"><a href="Hierarchical.html#participants"><i class="fa fa-check"></i><b>4.2.1</b> Participants</a></li>
<li class="chapter" data-level="4.2.2" data-path="Hierarchical.html"><a href="Hierarchical.html#stimuli"><i class="fa fa-check"></i><b>4.2.2</b> Stimuli</a></li>
<li class="chapter" data-level="4.2.3" data-path="Hierarchical.html"><a href="Hierarchical.html#procedure"><i class="fa fa-check"></i><b>4.2.3</b> Procedure</a></li>
</ul></li>
<li class="chapter" data-level="4.3" data-path="Hierarchical.html"><a href="Hierarchical.html#results-3"><i class="fa fa-check"></i><b>4.3</b> Results</a><ul>
<li class="chapter" data-level="4.3.1" data-path="Hierarchical.html"><a href="Hierarchical.html#summary-of-the-group-data-published-in-de_klerk_lost_2019"><i class="fa fa-check"></i><b>4.3.1</b> Summary of the group data published in <span class="citation">de Klerk et al. (<span>2019</span>)</span></a></li>
<li class="chapter" data-level="4.3.2" data-path="Hierarchical.html"><a href="Hierarchical.html#data-screening"><i class="fa fa-check"></i><b>4.3.2</b> Data Screening</a></li>
<li class="chapter" data-level="4.3.3" data-path="Hierarchical.html"><a href="Hierarchical.html#analysis-1-linear-regression-model-with-autoregressive-ar1-error-structure"><i class="fa fa-check"></i><b>4.3.3</b> Analysis 1: Linear Regression Model with Autoregressive (AR1) Error Structure</a></li>
<li class="chapter" data-level="4.3.4" data-path="Hierarchical.html"><a href="Hierarchical.html#analysis-2-hierarchical-bayesian-analysis"><i class="fa fa-check"></i><b>4.3.4</b> Analysis 2: Hierarchical Bayesian Analysis</a></li>
</ul></li>
<li class="chapter" data-level="4.4" data-path="Hierarchical.html"><a href="Hierarchical.html#discussion"><i class="fa fa-check"></i><b>4.4</b> Discussion</a></li>
<li class="chapter" data-level="" data-path="Hierarchical.html"><a href="Hierarchical.html#ch04ethics"><i class="fa fa-check"></i>Ethics Statement</a></li>
<li class="chapter" data-level="" data-path="Hierarchical.html"><a href="Hierarchical.html#ch04acknowledgments"><i class="fa fa-check"></i>Acknowledgments</a></li>
<li class="chapter" data-level="" data-path="Hierarchical.html"><a href="Hierarchical.html#ch05appendix"><i class="fa fa-check"></i>Appendix A</a></li>
<li class="chapter" data-level="" data-path="Hierarchical.html"><a href="Hierarchical.html#ch05appendixB"><i class="fa fa-check"></i>Appendix B</a><ul>
<li class="chapter" data-level="4.4.1" data-path="Hierarchical.html"><a href="Hierarchical.html#software"><i class="fa fa-check"></i><b>4.4.1</b> Software</a></li>
<li class="chapter" data-level="4.4.2" data-path="Hierarchical.html"><a href="Hierarchical.html#priors"><i class="fa fa-check"></i><b>4.4.2</b> Priors</a></li>
<li class="chapter" data-level="4.4.3" data-path="Hierarchical.html"><a href="Hierarchical.html#estimation-and-convergence"><i class="fa fa-check"></i><b>4.4.3</b> Estimation and Convergence</a></li>
<li class="chapter" data-level="4.4.4" data-path="Hierarchical.html"><a href="Hierarchical.html#posterior-predictive-check"><i class="fa fa-check"></i><b>4.4.4</b> Posterior predictive check</a></li>
<li class="chapter" data-level="4.4.5" data-path="Hierarchical.html"><a href="Hierarchical.html#sensitivity-analysis"><i class="fa fa-check"></i><b>4.4.5</b> Sensitivity Analysis</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="5" data-path="Burns.html"><a href="Burns.html"><i class="fa fa-check"></i><b>5</b> The importance of collaboration in Bayesian analyses with small samples</a><ul>
<li class="chapter" data-level="" data-path="Burns.html"><a href="Burns.html#abstract-3"><i class="fa fa-check"></i>Abstract</a></li>
<li class="chapter" data-level="5.1" data-path="Burns.html"><a href="Burns.html#ch05introduction"><i class="fa fa-check"></i><b>5.1</b> Introduction</a></li>
<li class="chapter" data-level="5.2" data-path="Burns.html"><a href="Burns.html#latent-growth-models-with-small-sample-sizes"><i class="fa fa-check"></i><b>5.2</b> Latent Growth Models with small sample sizes</a></li>
<li class="chapter" data-level="5.3" data-path="Burns.html"><a href="Burns.html#empirical-example-analysis-plan"><i class="fa fa-check"></i><b>5.3</b> Empirical example: Analysis plan</a><ul>
<li class="chapter" data-level="5.3.1" data-path="Burns.html"><a href="Burns.html#research-question-model-specification-and-an-overview-of-data"><i class="fa fa-check"></i><b>5.3.1</b> Research question, model specification and an overview of data</a></li>
<li class="chapter" data-level="5.3.2" data-path="Burns.html"><a href="Burns.html#specifying-and-understanding-priors"><i class="fa fa-check"></i><b>5.3.2</b> Specifying and understanding priors</a></li>
</ul></li>
<li class="chapter" data-level="5.4" data-path="Burns.html"><a href="Burns.html#empirical-example-conducting-the-analysis"><i class="fa fa-check"></i><b>5.4</b> Empirical example: Conducting the analysis</a></li>
<li class="chapter" data-level="5.5" data-path="Burns.html"><a href="Burns.html#debugging"><i class="fa fa-check"></i><b>5.5</b> Debugging</a></li>
<li class="chapter" data-level="5.6" data-path="Burns.html"><a href="Burns.html#moving-forward-alternative-models"><i class="fa fa-check"></i><b>5.6</b> Moving forward: Alternative Models</a></li>
<li class="chapter" data-level="5.7" data-path="Burns.html"><a href="Burns.html#conclusion"><i class="fa fa-check"></i><b>5.7</b> Conclusion</a></li>
<li class="chapter" data-level="5.8" data-path="Burns.html"><a href="Burns.html#acknowledgements"><i class="fa fa-check"></i><b>5.8</b> Acknowledgements</a></li>
</ul></li>
<li class="chapter" data-level="6" data-path="elicitlgm.html"><a href="elicitlgm.html"><i class="fa fa-check"></i><b>6</b> Expert Elicitation in the Social Sciences: The case of Posttraumatic Stress Symptoms Development in Children with Burn Injuries</a><ul>
<li class="chapter" data-level="" data-path="elicitlgm.html"><a href="elicitlgm.html#abstract-4"><i class="fa fa-check"></i>Abstract</a></li>
<li class="chapter" data-level="6.1" data-path="elicitlgm.html"><a href="elicitlgm.html#ch06introduction"><i class="fa fa-check"></i><b>6.1</b> Introduction</a></li>
<li class="chapter" data-level="6.2" data-path="elicitlgm.html"><a href="elicitlgm.html#methods"><i class="fa fa-check"></i><b>6.2</b> Methods</a><ul>
<li class="chapter" data-level="6.2.1" data-path="elicitlgm.html"><a href="elicitlgm.html#motivating-example"><i class="fa fa-check"></i><b>6.2.1</b> Motivating Example</a></li>
<li class="chapter" data-level="6.2.2" data-path="elicitlgm.html"><a href="elicitlgm.html#expert-elicitation-1"><i class="fa fa-check"></i><b>6.2.2</b> Expert Elicitation</a></li>
<li class="chapter" data-level="6.2.3" data-path="elicitlgm.html"><a href="elicitlgm.html#sample-of-experts"><i class="fa fa-check"></i><b>6.2.3</b> Sample of Experts</a></li>
</ul></li>
<li class="chapter" data-level="6.3" data-path="elicitlgm.html"><a href="elicitlgm.html#results-4"><i class="fa fa-check"></i><b>6.3</b> Results</a><ul>
<li class="chapter" data-level="6.3.1" data-path="elicitlgm.html"><a href="elicitlgm.html#individual-and-group-expert-judgements"><i class="fa fa-check"></i><b>6.3.1</b> Individual and Group Expert Judgements</a></li>
<li class="chapter" data-level="6.3.2" data-path="elicitlgm.html"><a href="elicitlgm.html#prior-data-disagreement"><i class="fa fa-check"></i><b>6.3.2</b> Prior-Data (dis)Agreement</a></li>
<li class="chapter" data-level="6.3.3" data-path="elicitlgm.html"><a href="elicitlgm.html#audio-recordings"><i class="fa fa-check"></i><b>6.3.3</b> Audio Recordings</a></li>
</ul></li>
<li class="chapter" data-level="6.4" data-path="elicitlgm.html"><a href="elicitlgm.html#discussion-1"><i class="fa fa-check"></i><b>6.4</b> Discussion</a></li>
<li class="chapter" data-level="" data-path="elicitlgm.html"><a href="elicitlgm.html#conflicts-of-interest"><i class="fa fa-check"></i>Conflicts of Interest</a></li>
<li class="chapter" data-level="" data-path="elicitlgm.html"><a href="elicitlgm.html#ethics-statement"><i class="fa fa-check"></i>Ethics Statement</a></li>
<li class="chapter" data-level="" data-path="elicitlgm.html"><a href="elicitlgm.html#acknowledgements-1"><i class="fa fa-check"></i>Acknowledgements</a></li>
<li class="chapter" data-level="" data-path="elicitlgm.html"><a href="elicitlgm.html#funding"><i class="fa fa-check"></i>Funding</a></li>
</ul></li>
<li class="chapter" data-level="7" data-path="thesisdiscussion.html"><a href="thesisdiscussion.html"><i class="fa fa-check"></i><b>7</b> Discussion</a><ul>
<li class="chapter" data-level="7.1" data-path="thesisdiscussion.html"><a href="thesisdiscussion.html#hidden-assumptions"><i class="fa fa-check"></i><b>7.1</b> Hidden assumptions</a></li>
<li class="chapter" data-level="7.2" data-path="thesisdiscussion.html"><a href="thesisdiscussion.html#expert-knowledge"><i class="fa fa-check"></i><b>7.2</b> Expert Knowledge</a></li>
<li class="chapter" data-level="7.3" data-path="thesisdiscussion.html"><a href="thesisdiscussion.html#taking-a-decision"><i class="fa fa-check"></i><b>7.3</b> Taking a decision</a></li>
</ul></li>
<li class="chapter" data-level="" data-path="nederlandse-samenvatting.html"><a href="nederlandse-samenvatting.html"><i class="fa fa-check"></i>Nederlandse Samenvatting</a></li>
<li class="chapter" data-level="" data-path="dankwoord.html"><a href="dankwoord.html"><i class="fa fa-check"></i>Dankwoord</a></li>
<li class="chapter" data-level="" data-path="curriculum-vitae.html"><a href="curriculum-vitae.html"><i class="fa fa-check"></i>Curriculum Vitae</a><ul>
<li class="chapter" data-level="" data-path="curriculum-vitae.html"><a href="curriculum-vitae.html#academic-publications"><i class="fa fa-check"></i>Academic Publications</a></li>
<li class="chapter" data-level="" data-path="curriculum-vitae.html"><a href="curriculum-vitae.html#book-chapters"><i class="fa fa-check"></i>Book Chapters</a></li>
<li class="chapter" data-level="" data-path="curriculum-vitae.html"><a href="curriculum-vitae.html#technical-reports"><i class="fa fa-check"></i>Technical Reports</a></li>
<li class="chapter" data-level="" data-path="curriculum-vitae.html"><a href="curriculum-vitae.html#manuscripts-under-review"><i class="fa fa-check"></i>Manuscripts under review</a></li>
<li class="chapter" data-level="" data-path="curriculum-vitae.html"><a href="curriculum-vitae.html#grants"><i class="fa fa-check"></i>Grants</a></li>
<li class="chapter" data-level="" data-path="curriculum-vitae.html"><a href="curriculum-vitae.html#awards"><i class="fa fa-check"></i>Awards</a></li>
</ul></li>
<li class="chapter" data-level="" data-path="ref.html"><a href="ref.html"><i class="fa fa-check"></i>References</a></li>
</ul>
</nav>
</div>
<div class="book-body">
<div class="body-inner">
<div class="book-header" role="navigation">
<h1>
<i class="fa fa-circle-o-notch fa-spin"></i><a href="./"></a>
</h1>
</div>
<div class="page-wrapper" tabindex="-1" role="main">
<div class="page-inner">
<section class="normal" id="section-">
<div id="DAC1" class="section level1">
<h1><span class="header-section-number">3</span> Using the Data Agreement Criterion to Rank Experts’ Beliefs</h1>
<div id="abstract-1" class="section level2 unnumbered">
<h2>Abstract</h2>
<p>Experts’ beliefs embody a present state of knowledge. It is desirable to take this knowledge into account when making decisions. However, ranking experts based on the merit of their beliefs is a difficult task. In this paper, we show how experts can be ranked based on their knowledge and their level of (un)certainty. By letting experts specify their knowledge in the form of a probability distribution, we can assess how accurately they can predict new data, and how appropriate their level of (un)certainty is. The expert’s specified probability distribution can be seen as a prior in a Bayesian statistical setting. We evaluate these priors by extending an existing prior-data (dis)agreement measure, the Data Agreement Criterion, and compare this approach to using Bayes factors to assess prior specification. We compare experts with each other and the data to evaluate their appropriateness. Using this method, new research questions can be asked and answered, for instance: Which expert predicts the new data best? Is there agreement between my experts and the data? Which experts’ representation is more valid or useful? Can we reach convergence between expert judgement and data? We provided an empirical example ranking (regional) directors of a large financial institution based on their predictions of turnover.
<!-- \indent _keywords:_ Bayes, Bayes factor, decision making, expert judgement, Kullback–Leibler divergence, prior-data (dis)agreement, ranking --></p>
</div>
<div id="ch03introduction" class="section level2">
<h2><span class="header-section-number">3.1</span> Introduction</h2>
<p>In the process of scientific inference, the knowledge and beliefs of experts can provide vital information. Experts’ beliefs represent the current state of knowledge. It is desirable to be able to include this information in analyses or decision-making processes. This can be done by using the Bayesian statistical framework. In Bayesian statistics, there are two sources of information: prior knowledge and data <span class="citation">(Gelman et al., <a href="#ref-gelman_bayesian_2013" role="doc-biblioref">2013</a>; Lynch, <a href="#ref-lynch_introduction_2007" role="doc-biblioref">2007</a>; Zyphur, Oswald, & Rupp, <a href="#ref-zyphur_bayesian_2015" role="doc-biblioref">2015</a>)</span>. The prior can be composed of expert knowledge <span class="citation">(Bolsinova, Hoijtink, Vermeulen, & Beguin, <a href="#ref-bolsinova_using_2017" role="doc-biblioref">2017</a>; O’Hagan et al., <a href="#ref-ohagan_uncertain_2006" role="doc-biblioref">2006</a>; Zondervan-Zwijnenburg et al., <a href="#ref-zondervan-zwijnenburg_application_2017" role="doc-biblioref">2017</a><a href="#ref-zondervan-zwijnenburg_application_2017" role="doc-biblioref">b</a>)</span>. However, deciding which expert yields the most appropriate information remains a critical challenge, for which we present a solution in this paper.</p>
<p>To be able to consider expert knowledge in Bayesian statistics, it must be represented in the form of a probability distribution. This can be done via a process called expert elicitation. Elicitation entails the extraction of expert knowledge and translating this knowledge into a probabilistic representation <span class="citation">(O’Hagan et al., <a href="#ref-ohagan_uncertain_2006" role="doc-biblioref">2006</a>)</span>. By using a probabilistic representation, we include both knowledge and (un)certainty of experts. However, experts are forced to use a representation system that belongs to the statistical realm. Therefore, it is essential that the elicitation process is carefully constructed so we do not introduce unnecessary and unjust bias.</p>
<p>The expression of expert knowledge in the form of a probability distribution is not merely based on statistical considerations. Forecasting without providing uncertainty estimates does not make sense, for, if we were certain, we would not predict but simply conclude future events to occur as they are inevitable. This would simply be a form of deductive logic and no discussion or disagreement based on the facts should be possible. Here, it is relevant to make the distinction between aleatory and epistemic uncertainty. Aleatory uncertainty is uncertainty due to randomness or chance, e.g., market volatility, whilst epistemic uncertainty is uncertainty due to a lack of knowledge. In practice, there is a blurred line between epistemic and aleatory uncertainty and the two can be seen as the ends on a spectrum, but, for the sake of argument, we shall make a clear distinction between the two here. In any case, if we can agree that, based on all the available information, there are still multiple outcomes possible, we have a situation in which we should start making forecasts including uncertainty estimates and probability distributions provide an excellent framework.</p>
<p>By collecting data and modeling the parameter of interest, we are able to gain an indication of the appropriate amount of uncertainty and the expected parameter value based on posterior distributions of interest in the model. In the limit, where we would not have epistemic uncertainty and all of the relevant background characteristics could be controlled for, any remaining residual variance in the model is the appropriate and correct amount of aleatory uncertainty. In practice, however, we do not have the perfect model and not all epistemic uncertainty can be ruled out, that is, we have not yet identified all relevant background characteristics. What we do have in practice are multiple experts with divergent beliefs on the relevant background characteristics. If we can evaluate their forecasts, including uncertainty, we can take more accurate forecasts as an indication of expertise on relevant aspects of the data generating process and we should let these experts guide us in identifying the relevant background characteristics. Moreover, if these knowledgeable experts can be identified and persuaded to share their insights with each other, they can start to learn from each other, the data and the appropriateness of assumptions underlying their forecasts. By expressing expert knowledge and data in the same framework, a learning process can start that has the potential to reduce uncertainty.</p>
<p>Once expert knowledge is elicited and data is collected, it is desirable to find a measure that naturally compares two pieces of information. The measure should assess the extent to which information from the data and expert knowledge resemble and conflict with each other. As the expert knowledge can be contained within a prior, it seems logical to assess the discrepancy or similarity of such a prior with respect to the data by means of a prior-data (dis)agreement measure. A desirable property for such a prior-data (dis)agreement measure would be to measure how one probability distribution diverges from a second probability distribution, rather than assessing the distance between two points estimates. The Data Agreement Criterion (DAC) <span class="citation">(Bousquet, <a href="#ref-bousquet_diagnostics_2008" role="doc-biblioref">2008</a>)</span> is based on Kullback–Leibler (KL) divergences <span class="citation">(Kullback & Leibler, <a href="#ref-kullback_information_1951" role="doc-biblioref">1951</a>)</span> and therefore meets this desired property. KL divergence has previously been used in a related context to assess calibration and information scores of experts <span class="citation">(Cooke, <a href="#ref-cooke_experts_1991" role="doc-biblioref">1991</a>; Quigley, Colson, Aspinall, & Cooke, <a href="#ref-quigley_elicitation_2018" role="doc-biblioref">2018</a>)</span>.</p>
<p>Prior-data (dis)agreement measures are currently used to evaluate, for example, the suitability of certain priors in the estimation of models or to uncover potential suitability problems with design, prior or both. Examples can be found in, for instance <span class="citation">(Fu, Celeux, Bousquet, & Couplet, <a href="#ref-fu_bayesian_2015" role="doc-biblioref">2015</a>; Fu, Couplet, & Bousquet, <a href="#ref-fu_adaptive_2017" role="doc-biblioref">2017</a>; Walley, Smith, Gale, & Woodward, <a href="#ref-walley_advantages_2015" role="doc-biblioref">2015</a>)</span>. We found no previous use of prior-data (dis)agreement measures to rank experts. However, when we have two experts, some very interesting questions can already be answered, for instance: Which expert predicts the new data best? Is there agreement between my experts and the data? Which expert’s representation is more valid or useful? Can we reach convergence between expert judgement and data? Therefore, the main contribution of this paper will be to provide an application of prior-data (dis)agreement measures to expert ranking.</p>
<p>Other measures that answer similar questions on different theoretical basis can be found. For instance, Cohen’s kappa <span class="citation">(Cohen, <a href="#ref-cohen_coefficient_1960" role="doc-biblioref">1960</a>)</span> could be used to assess inter-rater agreement, intraclass correlations <span class="citation">(Koch, <a href="#ref-koch_intraclass_2004" role="doc-biblioref">2004</a>)</span> could be used to asses rater reliability <span class="citation">(Shrout & Fleiss, <a href="#ref-shrout_intraclass_1979" role="doc-biblioref">1979</a>)</span> and Brier scores <span class="citation">(Brier, <a href="#ref-brier_verification_1950" role="doc-biblioref">1950</a>)</span> can be used to asses discrepancy between experts’ estimated probability and actual outcomes <span class="citation">(Barons, Wright, & Smith, <a href="#ref-barons_eliciting_2018" role="doc-biblioref">2018</a>)</span>. These measures, however, do not account for the uncertainty of the experts over their provided estimates.</p>
<p>An alternative approach could be to use Bayes factors (BF) <span class="citation">(Kass & Raftery, <a href="#ref-kass_bayes_1995" role="doc-biblioref">1995</a>)</span> based on marginal likelihoods. One could imagine different experts’ beliefs to be competing versions of models. When the differing views are expressed in different prior distributions, we could assess the likelihood of the data averaged across the prior distribution, which is what a marginal likelihood is <span class="citation">(Liu & Aitkin, <a href="#ref-liu_bayes_2008" role="doc-biblioref">2008</a>)</span>. This likelihood depends on the model structure, such as parametrization, or the set of probability distributions that is used as the model <span class="citation">(Wasserman, <a href="#ref-wasserman_bayesian_2000" role="doc-biblioref">2000</a>)</span>. If we keep this set of probability distributions, the model, equal across the experts and the same data is used, the marginal likelihood provides an indication of which experts’ prior belief gives most probability to the data, and who is thus ranked most trustworthy. The BF, being a ratio of marginal likelihoods, could then provide us odds in favor of one expert’s beliefs over another’s. This approach warrants further comparison, which is given in Section <a href="DAC1.html#DACvsBF">3.2.2</a>.</p>
<div style="page-break-after: always;"></div>
<p>In the remainder of this paper, we present the following work. We provide a detailed description of the DAC and explain why this measure is especially suitable to compare expert judgement and data. As the DAC currently determines the degree of prior-data (dis)agreement of one prior, we propose a straightforward adjustment of the statistic to allow the ranking of multiple sources of prior information, i.e., multiple experts’ beliefs. We discuss how Bayes factors could be used to rank experts based on their prior specifications. Finally, we provide an empirical example to show that the adapted DAC can be used to compare and rank several experts based on their beliefs and we compare this to using Bayes factors. In the empirical example, we rank experts from a large financial institution based on their predictions of new data concerning turnover. The empirical study in this article received approval from our internal Ethics Committee of the Faculty of Social and Behavioural Sciences of Utrecht University. The letter of approval can be found in the data archive for this study along with all other code and data, as far as contracts permit us, in order to ensure everything presented in this paper is reproducible. The data archive can be found on the Open Science Framework (OSF) webpage for this project at <a href="https://osf.io/u57qs">https://osf.io/u57qs</a>.</p>
</div>
<div id="expert-data-disagreement" class="section level2">
<h2><span class="header-section-number">3.2</span> Expert-Data (Dis)Agreement</h2>
<p>Within this section, we discuss the DAC and the Bayes factor that are used to evaluate experts’ beliefs.</p>
<div id="data-agreement-criterion" class="section level3">
<h3><span class="header-section-number">3.2.1</span> Data Agreement Criterion</h3>
<p>Within this subsection, we provide a detailed and mathematical description of the DAC before proposing the adaptation that allows the ranking of multiple experts’ beliefs at the same time. The DAC is based on a ratio of KL divergences; therefore, we will first describe KL divergence <span class="citation">(Kullback & Leibler, <a href="#ref-kullback_information_1951" role="doc-biblioref">1951</a>)</span>.</p>
<div id="kullback-leibler-divergence" class="section level4">
<h4><span class="header-section-number">3.2.1.1</span> Kullback-Leibler Divergence</h4>
<p>The KL divergence describes measurements of informative regret, or, in other words, it measures the loss of information that occurs if the reference distribution <span class="math inline">\((\pi_1)\)</span> is approximated by another distribution <span class="math inline">\((\pi_2)\)</span>. This loss of information or informative regret is expressed in a numerical value and the higher this value is, the more loss of information is present, i.e., the greater the discrepancy between the two distributions. The KL divergence is calculated by</p>
<p><span class="math display" id="eq:ch03eq1">\[\begin{equation}
KL(\pi_1 || \pi_2) = \int_{\Theta} \pi_1(\theta) log \frac{\pi_1(\theta)}{\pi_2(\theta)} d\theta,
\tag{3.1}
\end{equation}\]</span>
where <span class="math inline">\(\Theta\)</span> is the set of all accessible values for the parameter <span class="math inline">\(\theta\)</span>, that is, its parameter space, <span class="math inline">\(\pi_1(\theta)\)</span> denotes the reference distribution and <span class="math inline">\(\pi_2(\theta)\)</span> denotes the distribution that approximates the reference distribution. In Figure <a href="DAC1.html#fig:ch03fig1">3.1</a>, it can be seen what KL divergences between two normal distributions look like. The value of the KL divergence is equal to the integral over the parameter space for the function. The greater the discrepancy between the distributions, the larger the value of the integral. This also follows from Equation <a href="DAC1.html#eq:ch03eq1">(3.1)</a> because, if the two distributions are equal, then <span class="math inline">\(\pi_1(\theta)/\pi_2(\theta)\)</span> equals one everywhere. As <span class="math inline">\(log(1) = 0\)</span>, the integral, or loss of information, is equal to zero. To support understanding of the KL divergence, we build a shiny application that provides an interactive variant of Figure <a href="DAC1.html#fig:ch03fig1">3.1</a>, which can be found via the OSF webpage at <a href="https://osf.io/u57qs">https://osf.io/u57qs</a>.</p>
<p>If we are able to represent both the data and the expert knowledge in a distributional form, a discrepancy between the two can be expressed by the KL divergence between the two. As we might have multiple experts but only one source of data, it seems natural that the data be considered the reference distribution, which is approximated by the experts’ beliefs expressed as probability distributions. We will see in the following, where we elaborate on the details of this prior-data (dis)agreement measure developed by Bousquet <span class="citation">(Bousquet, <a href="#ref-bousquet_diagnostics_2008" role="doc-biblioref">2008</a>)</span>, that this is indeed the case in the DAC.</p>
<div class="figure" style="text-align: center"><span id="fig:ch03fig1"></span>
<img src="figures/chapter_3/Figure1.png" alt="KL divergences between two normal distributions. In this example, $\pi_1$ is a standard normal distribution and $\pi_2$ is a normal distribution with a mean of 1 and a variance of 1. The value of the KL divergence is equal to the integral over the parameter space for the function. The green shaded area above the x-axis adds to the KL divergence and the green shaded area below the x-axis subtracts from the KL divergence." width="90%" />
<p class="caption">
Figure 3.1: KL divergences between two normal distributions. In this example, <span class="math inline">\(\pi_1\)</span> is a standard normal distribution and <span class="math inline">\(\pi_2\)</span> is a normal distribution with a mean of 1 and a variance of 1. The value of the KL divergence is equal to the integral over the parameter space for the function. The green shaded area above the x-axis adds to the KL divergence and the green shaded area below the x-axis subtracts from the KL divergence.
</p>
</div>
</div>
<div id="noninformativepriors" class="section level4">
<h4><span class="header-section-number">3.2.1.2</span> DAC</h4>
<p>The DAC, as mentioned before, is a ratio of two KL divergences. A KL divergence provides an indication of the discrepancy between two distributions. KL divergence does not, however,have a natural cut-off value or threshold that can help us decide when a certain amount of loss of information would constitute prior-data disagreement. To be able to objectively conclude when prior-data disagreement exists, the DAC compares the loss of information that a certain prior has with respect to the data with the loss of information that a benchmark prior has with respect to the data. The KL divergence between the chosen prior and the data is the numerator in the ratio whilst the KL divergence between some benchmark prior and the data is the denominator in the ratio. A benchmark prior, denoted by <span class="math inline">\(\pi^J(\theta)\)</span>, should be chosen such that the posterior distribution is completely dominated by the observed data <span class="math inline">\(\textbf{y}\)</span> <span class="citation">(Bernardo, <a href="#ref-bernardo_reference_1979" role="doc-biblioref">1979</a>)</span>. We denote such a posterior distribution by <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> and use this as a representation of the data.</p>
<div style="page-break-after: always;"></div>
<p>It is necessary to expand on the choice for the benchmark prior <span class="math inline">\(\pi^J(\theta)\)</span> and in relation to this the posterior distribution <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span>. Bousquet <span class="citation">(Bousquet, <a href="#ref-bousquet_diagnostics_2008" role="doc-biblioref">2008</a>)</span> follows the reasoning Bernardo provided in discussion with Irony and Singpurwalla <span class="citation">(Irony & Singpurwalla, <a href="#ref-irony_noninformative_1997" role="doc-biblioref">1997</a>)</span> to see <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> as a non-subjective posterior that is representative of the situation that one’s prior knowledge was dominated by the data. In other words, <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> can be considered as a fictitious expert that is perfectly in agreement with the data, having no prior knowledge and being informed about the observations. <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> can be considered to be a reference posterior conveying the inferential content of the data <span class="citation">(Bernardo, <a href="#ref-bernardo_reference_1979" role="doc-biblioref">1979</a>)</span>.</p>
<p>If <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> is taken to be a reference posterior, this would implicitly support the choice of <span class="math inline">\(\pi^J(\theta)\)</span> such that it is a reference prior as originally developed by <span class="citation">Bernardo (<a href="#ref-bernardo_reference_1979" role="doc-biblioref">1979</a>)</span>, further developed by Berger and Bernardo, e.g., <span class="citation">(Berger & Bernardo, <a href="#ref-berger_estimating_1989" role="doc-biblioref">1989</a>)</span>, described in <span class="citation">Bernardo & Smith (<a href="#ref-bernardo_bayesian_1994" role="doc-biblioref">1994</a>)</span> and more formally worked out in <span class="citation">Berger, Bernardo, & Sun (<a href="#ref-berger_formal_2009" role="doc-biblioref">2009</a>)</span>. Reference priors are not the only possible choice for priors that convey in some sense minimal information or affect the information of the likelihood as weakly as possible <span class="citation">(Gelman, Simpson, & Betancourt, <a href="#ref-gelman_prior_2017" role="doc-biblioref">2017</a>)</span>. An extensive overview can be found in <span class="citation">Kass & Wasserman (<a href="#ref-kass_selection_1996" role="doc-biblioref">1996</a>)</span> and some notable options are
Jeffreys priors <span class="citation">(Jeffreys, <a href="#ref-jeffreys_invariant_1946" role="doc-biblioref">1946</a>, <a href="#ref-jeffreys_theory_1961" role="doc-biblioref">1961</a>)</span> and maximum entropy priors <span class="citation">(Jaynes, <a href="#ref-jaynes_rationale_1982" role="doc-biblioref">1982</a>)</span> to which the reference priors reduce in specific cases <span class="citation">(Bernardo & Smith, <a href="#ref-bernardo_bayesian_1994" role="doc-biblioref">1994</a>)</span>.</p>
<p>One notable problem for using reference priors as a choice for <span class="math inline">\(\pi^J(\theta)\)</span> is that they often are improper priors <span class="citation">(Yang & Berger, <a href="#ref-yang_catalog_1996" role="doc-biblioref">1996</a>)</span> and KL divergences and thus the DAC are not well defined when one of the distributions is improper. An adaptation of the DAC could be used, however a choice for a more convenient prior that is proper and leads to a posterior <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> closely resembling a reference posterior seems reasonable <span class="citation">(Bousquet, <a href="#ref-bousquet_diagnostics_2008" role="doc-biblioref">2008</a>)</span>.</p>
<p>Now taking <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> as the reference posterior, <span class="math inline">\(\pi^J(\theta)\)</span> as the benchmark prior and the data <span class="math inline">\(\textbf{y}\)</span>, the DAC for a chosen (expert) prior, denoted by <span class="math inline">\(\pi(\theta)\)</span>, can be expressed by</p>
<p><span class="math display" id="eq:ch03eq2">\[\begin{equation}
DAC = \frac{KL[\pi^J(.|\textrm{y})||\pi]}{KL[\pi^J(.|\textrm{y})||\pi^J]},
\tag{3.2}
\end{equation}\]</span>
following the notation of Bousquet.</p>
<p>The benchmark, being an uninformative prior, should by definition not be conflicting with the data and therefore serves as a good reference point to determine if a certain amount of loss of information can be considered to be relevant. If a prior conflicts less with the data than the benchmark does, we should consider the prior to be in prior-data agreement. If a prior conflicts more with the data than the benchmark prior does, we do consider the prior to be in prior-data disagreement. Hence, if the DAC > 1, we conclude prior-data disagreement because the KL divergence of the prior is larger than the KL divergence of the benchmark prior; otherwise, we conclude prior-data agreement.</p>
<p>To illustrate the calculation of the DAC, we provide a numerical example together with a visual representation that can be found in Figure <a href="DAC1.html#fig:ch03fig2">3.2</a>. Consider the case in which <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> is the <span class="math inline">\(N(0,1)\)</span> density, <span class="math inline">\(\pi(\theta)\)</span> is the <span class="math inline">\(N(0.5,1)\)</span> density and <span class="math inline">\(\pi^J(\theta)\)</span> is the <span class="math inline">\(N(0,900)\)</span> density. The DAC is then calculated by taking the ratio of the following two KL divergences, Figure <a href="DAC1.html#fig:ch03fig2">3.2</a>A; <span class="math inline">\(KL[\pi^J(.|\textrm{y})||\pi] = 0.125\)</span> and Figure <a href="DAC1.html#fig:ch03fig2">3.2</a>B; <span class="math inline">\(KL[\pi^J(.|\textrm{y})||\pi^J] = 2.902\)</span>, such that <span class="math inline">\(DAC = 0.125/2.902 = 0.043\)</span>. The DAC < 1, thus we conclude prior-data agreement, and <span class="math inline">\(\pi(\theta)\)</span> is a better approximation of <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> than <span class="math inline">\(\pi^J(\theta)\)</span>.</p>
<div class="figure" style="text-align: center"><span id="fig:ch03fig2"></span>
<img src="figures/chapter_3/figure2.png" alt="Calculating the DAC. In this example, $\pi^J(\theta|\textbf{y})$ is a standard normal distribution, $\pi(\theta)$ is a normal distribution with a mean of 0.5 and a variance of 1 and $\pi^J(\theta)$ is a normal distribution with a mean of 0 and a variance of 900. The DAC < 1, thus prior-data agreement is concluded." width="90%" />
<p class="caption">
Figure 3.2: Calculating the DAC. In this example, <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> is a standard normal distribution, <span class="math inline">\(\pi(\theta)\)</span> is a normal distribution with a mean of 0.5 and a variance of 1 and <span class="math inline">\(\pi^J(\theta)\)</span> is a normal distribution with a mean of 0 and a variance of 900. The DAC < 1, thus prior-data agreement is concluded.
</p>
</div>
</div>
<div id="extension-to-multiple-experts" class="section level4">
<h4><span class="header-section-number">3.2.1.3</span> Extension to Multiple Experts</h4>
<p>The DAC, as described in the section above, determines the degree of prior-data (dis)agreement for a single prior that is to be evaluated. However, when we have multiple experts that each hold their own beliefs and we express each of these in the form of a probability distribution, we can ask some interesting questions. In Figure <a href="DAC1.html#fig:ch03fig3">3.3</a>, we see some examples of situations that we could encounter. In Figure <a href="DAC1.html#fig:ch03fig3">3.3</a>A, we see a situation in which experts differ in their predictions and their (un)certainty. The question that arises from the situation in Figure <a href="DAC1.html#fig:ch03fig3">3.3</a>A is which of these predictions best approximates the information that the data provides us? Figure <a href="DAC1.html#fig:ch03fig3">3.3</a>B shows a scenario in which the experts are predicting similar to each other but all differ with respect to the data. The question that arises from the situation in Figure <a href="DAC1.html#fig:ch03fig3">3.3</a>B is which of the two is correct, the data or the experts?</p>
<p>To be able to answer these types of questions, we need to extend the DAC to incorporate multiple experts’ priors, which are to be evaluated against the same posterior distribution, reflecting the data, and the same benchmark prior. The DAC thus needs to become a vector of length D resulting in</p>
<p><span class="math display" id="eq:ch03eq3">\[\begin{equation}
DAC_d = \frac{KL[\pi^J(.|\textrm{y})||\pi_d]}{KL[\pi^J(.|\textrm{y})||\pi^J]},
\tag{3.3}
\end{equation}\]</span>
where the subscript <span class="math inline">\(d\)</span> denotes the different input for <span class="math inline">\(D\)</span> experts so <span class="math inline">\(DAC_d = DAC_1,...,DAC_D\)</span> and <span class="math inline">\(\pi_d(\theta) = \pi_1(\theta),...\pi_D(\theta)\)</span>. This extension of the KL divergence in which not one but a vector of models are entered to be compared with the preferred model is straightforward and has previously been described in the context of the Akaike Information Criterion (AIC) <span class="citation">(Akaike, <a href="#ref-akaike_information_1973" role="doc-biblioref">1973</a>; Burnham & Anderson, <a href="#ref-burnham_model_2002" role="doc-biblioref">2002</a>)</span>.</p>
<div class="figure" style="text-align: center"><span id="fig:ch03fig3"></span>
<img src="figures/chapter_3/Figure3.png" alt="Scenarios in which there are multiple experts and one source of data. (A) shows experts differing in prediction and (un)certainty, all (dis)agreeing to a certain extent with the data; (B) shows a scenario in which all experts disagree with the data, which results in the question of which of the sources of information is correct." width="90%" />
<p class="caption">
Figure 3.3: Scenarios in which there are multiple experts and one source of data. (A) shows experts differing in prediction and (un)certainty, all (dis)agreeing to a certain extent with the data; (B) shows a scenario in which all experts disagree with the data, which results in the question of which of the sources of information is correct.
</p>
</div>
</div>
<div id="influence-of-the-benchmark" class="section level4">
<h4><span class="header-section-number">3.2.1.4</span> Influence of the Benchmark</h4>
<p>The choice for a specific benchmark can influence the results of the <span class="math inline">\(DAC_d\)</span>. Bousquet <span class="citation">(<a href="#ref-bousquet_diagnostics_2008" role="doc-biblioref">2008</a>)</span> suggests that, in applied studies, the availability of a convenient or intuitive prior for the benchmark seems reasonable. However, it is important to realize that the choice for a benchmark prior does influence the results of the analysis in the sense that the cut-off value for determining prior-data disagreement will shift as the KL divergence between <span class="math inline">\(\pi^J(\theta|\textrm{y})\)</span> and <span class="math inline">\(\pi^J(\theta)\)</span> changes. However, as long as the benchmark prior is an uninformative prior in the sense that the posterior distribution is dominated by the data, <span class="math inline">\(\pi^J(\theta|\textrm{y})\)</span> will remain largely unchanged. This ensures that the <span class="math inline">\(DAC_d\)</span> has the good property that when multiple experts are compared their ranking does not change dependent on which uninformative benchmark is chosen. This follows from the stability of <span class="math inline">\(\pi^J(\theta|\textrm{y})\)</span>, which ensures that the KL divergences between <span class="math inline">\(\pi^J(\theta|\textrm{y})\)</span> and <span class="math inline">\(\pi^J(\theta)\)</span> are stable. Different choices for <span class="math inline">\(\pi^J(\theta)\)</span> do change the KL divergence in the denominator and therefore shift the prior-data disagreement boundary.</p>
<p>Concerning the benchmark, it is useful to note that the benchmark need not be restricted to an uninformative prior, but using an informative prior changes the interpretation and behavior of the DAC. When <span class="math inline">\(\pi^J(\theta)\)</span> is informative, <span class="math inline">\(\pi^J(\theta|\textrm{y})\)</span> is sensitive to the specification of <span class="math inline">\(\pi^J(\theta)\)</span> and the KL divergence between <span class="math inline">\(\pi^J(\theta|\textrm{y})\)</span> and <span class="math inline">\(\pi_d(\theta)\)</span> need no longer be stable, potentially influencing the ranking of the experts. To show the above described behavior visually, we present the results of a simulation study in Figure <a href="DAC1.html#fig:ch03fig4">3.4</a>. We show four different conditions, that is, four different choices for benchmark priors, to illustrate the change in behavior for the <span class="math inline">\(DAC_d\)</span>. In all four situations, we use the same data, <span class="math inline">\(\textbf{y}\)</span>, which is a sample of 100 from a standard normal distribution with a sample mean <span class="math inline">\(\bar{y}\)</span>. <span class="math inline">\(\pi_d(\theta)\)</span> is the <span class="math inline">\(N(\mu_0, \sigma^2_0)\)</span> density and we show the <span class="math inline">\(DAC_d\)</span> values for <span class="math inline">\(\mu_0 = \bar{y}-4,...,\bar{y}+4\)</span> and <span class="math inline">\(\sigma_0 = 0.1,...,3\)</span>. The four panels show different conditions for the benchmarks such that, in Figure <a href="DAC1.html#fig:ch03fig4">3.4</a>A, it is the <span class="math inline">\(N(0, 10,000)\)</span> density, in Figure <a href="DAC1.html#fig:ch03fig4">3.4</a>B, the <span class="math inline">\(N(0, 1)\)</span> density, in Figure <a href="DAC1.html#fig:ch03fig4">3.4</a>C, the <span class="math inline">\(U(-50, 50)\)</span> density and in Figure <a href="DAC1.html#fig:ch03fig4">3.4</a>D the <span class="math inline">\(N(5, 0.5)\)</span> density. It can be seen that, for the two uninformative priors in Figure <a href="DAC1.html#fig:ch03fig4">3.4</a>A,C, the behavior of the <span class="math inline">\(DAC_d\)</span> is stable. We would expect to draw the same conclusions and rank experts in the same way independent of the choice of either benchmark. However, when we specify an informative benchmark such as in Figure <a href="DAC1.html#fig:ch03fig4">3.4</a>B,D, we see that both the behavior of the <span class="math inline">\(DAC_d\)</span> and the determination of prior-data (dis)agreement shift. In Figure <a href="DAC1.html#fig:ch03fig4">3.4</a>B, an informative and accurate benchmark leads almost invariably to concluding prior-data disagreement for <span class="math inline">\(\pi_d(\theta)\)</span> In Figure <a href="DAC1.html#fig:ch03fig4">3.4</a>D, the informative but inaccurate benchmark leads us to conclude prior-data disagreement only if <span class="math inline">\(\pi_d(\theta)\)</span> is in the wrong location and has a very small variance.</p>
<p>The simulation study presented in Figure <a href="DAC1.html#fig:ch03fig4">3.4</a> shows that the choice for a certain benchmark can influence your results, so, even if a convenient or intuitive prior seems reasonable, it should be carefully chosen. Researchers should be aware that their ranking is stable as long as an uninformative prior is chosen, but it might not be if the benchmark prior contains information.</p>
<div class="figure" style="text-align: center"><span id="fig:ch03fig4"></span>
<img src="figures/chapter_3/Figure4.png" alt="The effect on the behavior of the $DAC_d$ for different choices for benchmark priors. All panels use the same data ($N = 100$) from a standard normal distribution and the same variations for $\pi_d(\theta)$ which are the normal distribution for which the parameters for the mean and standard deviation are given on the x-axis and y-axis of the panels. In (A), the benchmark is the $N(0, 10,000)$ density; in (B), the $N(0, 1)$ density; in (C), the $U(-50, 50)$ density and in (D), the $N(5, 0.5)$ density" width="90%" />
<p class="caption">
Figure 3.4: The effect on the behavior of the <span class="math inline">\(DAC_d\)</span> for different choices for benchmark priors. All panels use the same data (<span class="math inline">\(N = 100\)</span>) from a standard normal distribution and the same variations for <span class="math inline">\(\pi_d(\theta)\)</span> which are the normal distribution for which the parameters for the mean and standard deviation are given on the x-axis and y-axis of the panels. In (A), the benchmark is the <span class="math inline">\(N(0, 10,000)\)</span> density; in (B), the <span class="math inline">\(N(0, 1)\)</span> density; in (C), the <span class="math inline">\(U(-50, 50)\)</span> density and in (D), the <span class="math inline">\(N(5, 0.5)\)</span> density
</p>
</div>
</div>
</div>
<div id="DACvsBF" class="section level3">
<h3><span class="header-section-number">3.2.2</span> Comparison to Ranking by the Bayes Factor</h3>
<p>In order to develop a good understanding of the behavior of the DAC for expert ranking, this section will provide a comparison to expert ranking using Bayes factors, that is, by ranking experts on the marginal likelihood resulting from their prior. First, we provide a mathematical description of the Bayes Factor (BF), which is a ratio of marginal likelihoods. Then, the influence of the benchmark prior will be discussed, followed by a comparison of expert ranking via Bayes Factors to expert ranking through the DAC.</p>
<div id="marginal-likelihood" class="section level4">
<h4><span class="header-section-number">3.2.2.1</span> Marginal Likelihood</h4>
<p>For a model <span class="math inline">\(M\)</span> and observed data <span class="math inline">\(\textbf{y}\)</span>, denote the likelihood <span class="math inline">\(f(\textbf{y}|\theta)\)</span> and prior <span class="math inline">\(\pi(\theta)\)</span> such that the posterior distribution</p>
<p><span class="math display" id="eq:ch03eq4">\[\begin{equation}
\pi(\theta|\textbf{y}) = \frac{f(\textbf{y}|\theta)\pi(\theta)}{\int_{\Theta}f(\textbf{y}|\theta)\pi(\theta)d\theta}.
\tag{3.4}
\end{equation}\]</span></p>
<p>The denominator on the right-hand side of Equation <a href="DAC1.html#eq:ch03eq4">(3.4)</a> is the marginal likelihood <span class="math inline">\(m(\textbf{y})\)</span>, sometimes called the evidence. The marginal likelihood can be thought of as the probability of the data averaged over the prior distribution <span class="citation">(Liu & Aitkin, <a href="#ref-liu_bayes_2008" role="doc-biblioref">2008</a>)</span>. As the probability of the data is dependent on the model, which is the set of probability distributions that is used <span class="citation">(Wasserman, <a href="#ref-wasserman_bayesian_2000" role="doc-biblioref">2000</a>)</span>, the marginal likelihood is influenced by the choice of model <span class="math inline">\(M\)</span>, the data <span class="math inline">\(\textbf{y}\)</span> and the prior <span class="math inline">\(\pi(\theta)\)</span>. If we have <span class="math inline">\(d\)</span> experts and we keep <span class="math inline">\(M\)</span> and <span class="math inline">\(\textbf{y}\)</span> equal across experts, the only difference in <span class="math inline">\(m_d(\textbf{y})\)</span> arises from the different specified priors <span class="math inline">\(\pi_d(\theta)\)</span>. We could thus differentiate between experts by assessing the probability of the data averaged across their specified prior beliefs.</p>
</div>
<div id="bayes-factor" class="section level4">
<h4><span class="header-section-number">3.2.2.2</span> Bayes Factor</h4>
<p>The BF can be used to compare the marginal likelihoods for the different experts, <span class="math inline">\(m_d(\textbf{y})\)</span>, such that, for example,</p>
<p><span class="math display" id="eq:ch03eq5">\[\begin{equation}
BF_{1d} = \frac{m_1(\textbf{y})}{m_d(\textbf{y})}
\tag{3.5}
\end{equation}\]</span>
provides the odds in favor of some model <span class="math inline">\(M_1\)</span>, versus model <span class="math inline">\(M_d\)</span>, the model that has the prior provided by expert <span class="math inline">\(d\)</span>. As the set of probability distributions that is used and the data <span class="math inline">\(\textbf{y}\)</span> are the same between experts, this essentially provides the odds in favor of the prior <span class="math inline">\(\pi_1(\theta)\)</span> versus prior <span class="math inline">\(\pi_d(\theta)\)</span>. Similarly, experts could be compared directly. It is well known that the BF is sensitive to the specification of different priors via the marginal likelihoods that are used <span class="citation">(Kass & Raftery, <a href="#ref-kass_bayes_1995" role="doc-biblioref">1995</a>; Liu & Aitkin, <a href="#ref-liu_bayes_2008" role="doc-biblioref">2008</a>; Morey, Romeijn, & Rouder, <a href="#ref-morey_philosophy_2016" role="doc-biblioref">2016</a>; Wasserman, <a href="#ref-wasserman_bayesian_2000" role="doc-biblioref">2000</a>)</span>. Liu and Aitkin <span class="citation">(<a href="#ref-liu_bayes_2008" role="doc-biblioref">2008</a>)</span> note that this is not necessarily undesirable. Moreover, in our case, this property is essential in allowing the evaluation of the relative merit of the experts’ beliefs that are specified in the form of prior probability distributions.</p>
</div>
<div id="benchmark-model" class="section level4">
<h4><span class="header-section-number">3.2.2.3</span> Benchmark Model</h4>
<p>The BF allows us to compare the odds in favor of one expert over another but neither the individual marginal likelihoods based on expert priors nor the ratios provide us with an assessment of the inherent appropriateness of the prior in terms of (dis)agreement between the prior and the data. As with the DAC, we could imagine taking a benchmark prior <span class="math inline">\(\pi^J(\theta)\)</span> that serves as a reference point such that the marginal likelihood is <span class="math inline">\(m^J(\textbf{y})\)</span>. If we take</p>
<p><span class="math display" id="eq:ch03eq6">\[\begin{equation}
BF_{Jd} = \frac{m^J(\textbf{y})}{m_d(\textbf{y})}
\tag{3.6}
\end{equation}\]</span>
and if <span class="math inline">\(BF_{Jd} < 1\)</span>, we would favor the model using the expert prior and conclude agreement with the data and, if <span class="math inline">\(BF_{Jd} > 1\)</span>, we would favor the model using the benchmark prior and conclude disagreement with the data.</p>
<p>However, we run into the same issue as with the KL divergences because the marginal likelihood is ill-defined if improper priors are used <span class="citation">(Kass & Raftery, <a href="#ref-kass_bayes_1995" role="doc-biblioref">1995</a>; Liu & Aitkin, <a href="#ref-liu_bayes_2008" role="doc-biblioref">2008</a>; Wasserman, <a href="#ref-wasserman_bayesian_2000" role="doc-biblioref">2000</a>)</span>. Thus, again, reference priors <span class="citation">(Bernardo, <a href="#ref-bernardo_reference_1979" role="doc-biblioref">1979</a>)</span> are not suitable for use in this context. Raftery <span class="citation">(<a href="#ref-raftery_approximate_1996" role="doc-biblioref">1996</a>)</span> suggests using a reference set of proper priors and both Kass and Raftery <span class="citation">(<a href="#ref-kass_bayes_1995" role="doc-biblioref">1995</a>)</span> and Liu and Aitkin <span class="citation">(<a href="#ref-liu_bayes_2008" role="doc-biblioref">2008</a>)</span> suggest conducting a sensitivity analysis in any case. To keep the comparison between the <span class="math inline">\(BF_{Jd}\)</span> and the <span class="math inline">\(DAC_d\)</span> straightforward, we will use the same benchmark prior <span class="math inline">\(\pi^J(\theta)\)</span> in both situations. As both <span class="math inline">\(BF_{Jd}\)</span> and <span class="math inline">\(DAC_d\)</span> are sensitive to the choice for <span class="math inline">\(\pi^J(\theta)\)</span>, a sensitivity analysis will be included in the empirical part of this paper. Note that this sensitivity is most evident when using these tools as a prior-data conflict criterion, as the expert rankings will generally remain unchanged for different uninformative benchmark priors.</p>
</div>
</div>
<div id="DACvsBF2" class="section level3">
<h3><span class="header-section-number">3.2.3</span> DAC Versus BF</h3>
<p>Burnham and Anderson state that the BF is analogous to the information-theoretic evidence ratio <span class="citation">(<a href="#ref-burnham_model_2002" role="doc-biblioref">2002</a>)</span>, for instance, the DAC. If we directly compare two experts with a BF, we would obtain odds favoring one expert over another and if we compare the KL divergences between two experts, we could state that one expert has a certain amount of times the loss of information in relation to another. Despite the analogy, they are also inherently different. This is most clearly seen when we compare the alternative form of the DAC from Bousquet <span class="citation">(<a href="#ref-bousquet_diagnostics_2008" role="doc-biblioref">2008</a>)</span>, which is given in our case by</p>
<p><span class="math display" id="eq:ch03eq7">\[\begin{equation}
DAC_{2,d}^J = \frac{m^J(\textbf{y})}{m_d(\textbf{y})}exp\{KL[\pi^J(.|\textbf(y))||\pi_d(.|\textbf{y})]\} = BF_{Jd}exp\{KL[\pi^J(.|\textbf(y))||\pi_d(.|\textbf{y})]\}.
\tag{3.7}
\end{equation}\]</span></p>
<p>Therefore, the difference between the DAC and BF can clearly be seen to be the fact that the DAC has an additional term which multiplies the BF by <span class="math inline">\(exp\{KL[\pi^J(.|\textbf{y})||\pi_d(.|\textbf{y})]\}\)</span>, the KL divergence between the reference posterior and the posterior from expert <span class="math inline">\(d\)</span>. This additional term is desirable, as it penalizes experts who are overly certain more harshly than the BF would.</p>
<p>To illustrate this, consider the following limiting case. Imagine an expert who believes that they are infinitely certain about the future. This expert should then specify their prior in the form of a Dirac delta function <span class="math inline">\(\delta_{\theta_0}(\theta)\)</span>, also called the degenerate distribution on the real line, which has density zero everywhere for <span class="math inline">\(\theta\)</span> except for <span class="math inline">\(\theta_0\)</span> where it has infinite density <span class="citation">(Dirac, <a href="#ref-dirac_principles_1947" role="doc-biblioref">1947</a>)</span>. Moreover, the delta function actually integrates to one and in that sense is a proper prior which can also be viewed as an infinitely narrow Gaussian <span class="math inline">\(\delta(\theta-\theta_0) = \lim_{\sigma\to0}N(\theta|\theta_0, \sigma^2)\)</span> <span class="citation">(Barber, <a href="#ref-barber_bayesian_2012" role="doc-biblioref">2012</a>)</span>. Now, if an expert states their prior belief in the form of a delta function and <span class="math inline">\(\theta_0\)</span> coincides with a region of <span class="math inline">\(\theta\)</span> where the likelihood <span class="math inline">\(f(\textbf{y}|\theta)>0\)</span>, both the marginal likelihood and <span class="math inline">\(KL[\pi^J(.|\textbf{y})||\delta_{\theta_0}(.)]\)</span> will become infinite. The meaning could, however, not differ any more. The marginal likelihood suggests that this expert is the best possible expert, whilst the KL divergence suggests that there is no worse expert. Although this scenario is quite extreme, van de Schoot, Griffioen and Winter <span class="citation">(<a href="#ref-van_de_schoot_dealing_2018" role="doc-biblioref">2018</a>)</span> did encounter such an expert in their elicitation endeavors.</p>
</div>
</div>
<div id="empirical-example" class="section level2">
<h2><span class="header-section-number">3.3</span> Empirical Example</h2>
<p>To show that the <span class="math inline">\(DAC_d\)</span> can be used to evaluate and rank several experts based on their beliefs, we conducted an empirical study. The team that participated consisted of 11 experts, 10 regional directors and one director. All were eligible to be included in the study. Seven experts were randomly invited to participate in the research; if any of the selected experts did not want to participate, they were classified as not selected in the research. In this way, we avoided the possibility of group pressure to participate. In the end, four out of the seven selected experts participated in an elicitation. The experts (<span class="math inline">\(D = 4\)</span>) provided forecasts concerning average turnover per professional in the first quarter of the year 2016. The (regional) directors are considered experts in knowledge concerning market opportunities, market dynamics and estimating the capabilities of the professionals to seize opportunities. Based on these skills, we expected that they could predict the average turnover per professional in the entire country in the first quarter of 2016. All information related to the empirical study can be found on the OSF webpage for this paper at <a href="https://osf.io/u57qs">https://osf.io/u57qs</a>.</p>
<div id="elicitation-procedure" class="section level3">
<h3><span class="header-section-number">3.3.1</span> Elicitation Procedure</h3>
<p>To get the experts to express their beliefs in the form of a probability distribution, we make use of the Five-Step Method <span class="citation">(Veen, Stoel, Zondervan-Zwijnenburg, & van de Schoot, <a href="#ref-veen_proposal_2017" role="doc-biblioref">2017</a>)</span>. To encapsulate the beliefs of the expert, the Five-Step Method actively separates two elements of the knowledge of the expert: tacit knowledge of the expert and their (un)certainty. In step one, a location parameter is elicited from the expert. This location parameter captures the tacit knowledge of the expert. To verify that the representation of the beliefs is accurate, step two is the incorporation of feedback implemented through the use of elicitation software. Experts can accept the representation of their beliefs or adjust their input. In step three, the (un)certainty of the experts is obtained and represented in the form of a scale and shape parameter. Step four is to provide feedback using elicitation software to verify the accurate representation of the expert’s (un)certainty, which they can either accept or they can adjust their input until the representation is in accordance with their beliefs. The fifth step is to use the elicited expert’s beliefs, in this case to determine their DAC score.</p>
<p>The experts first performed a practice elicitation for their own sales team before moving on to the whole country. The practice run enabled them to acquaint themselves with the elicitation procedure and software we used. The elicited distributions were restricted to be skewed normal distributions such that <span class="math inline">\(\pi_d(\theta)\)</span> are <span class="math inline">\(SN(\mu_0,\sigma^2_0,\gamma_0)\)</span> densities where subscript <span class="math inline">\(d\)</span> denotes expert <span class="math inline">\(d = 1,...,D\)</span>, <span class="math inline">\(\mu_0\)</span> denotes the prior mean, <span class="math inline">\(\sigma^2_0\)</span> denotes the prior variance and <span class="math inline">\(\gamma_0\)</span> denotes the prior skewness. The shape parameter <span class="math inline">\(\gamma_0\)</span> is based on a general method for the transformation of symmetric distributions into skewed distributions as described by Equation (1) in Fernandez and Steel <span class="citation">(<a href="#ref-fernandez_bayesian_1998" role="doc-biblioref">1998</a>)</span>. Table <a href="DAC1.html#tab:ch03tab1">3.1</a> provides an overview of the elicited distributions for the four experts in this empirical study. The distributions are based upon transformed data to avoid revealing business-sensitive information.</p>
<table style="width:74%;">
<caption><span id="tab:ch03tab1">Table 3.1: </span> The values of the hyper parameters of <span class="math inline">\(\pi(\theta|y)\)</span> for the empirical study.</caption>
<colgroup>
<col width="22%" />
<col width="15%" />
<col width="18%" />
<col width="18%" />
</colgroup>
<thead>
<tr class="header">
<th></th>
<th align="center"><span class="math inline">\(\mu_0\)</span></th>
<th align="center"><span class="math inline">\(\sigma_0\)</span></th>
<th align="center"><span class="math inline">\(\gamma_0\)</span></th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>Expert 1</td>
<td align="center">2.15</td>
<td align="center">0.09</td>
<td align="center">0.78</td>
</tr>
<tr class="even">
<td>Expert 2</td>
<td align="center">2.16</td>
<td align="center">0.07</td>
<td align="center">0.82</td>
</tr>
<tr class="odd">
<td>Expert 3</td>
<td align="center">1.97</td>
<td align="center">0.11</td>
<td align="center">0.82</td>
</tr>
<tr class="even">
<td>Expert 4</td>
<td align="center">2.35</td>
<td align="center">0.11</td>
<td align="center">0.94</td>
</tr>
</tbody>
</table>
</div>
<div id="ranking-the-experts" class="section level3">
<h3><span class="header-section-number">3.3.2</span> Ranking the Experts</h3>
<p>The predictions of the experts concerned the average turnover per professional (<span class="math inline">\(N = 104\)</span>). The benchmark is the <span class="math inline">\(U(0, 5)\)</span> density. A uniform distribution was chosen for the normal model in line with the prior used by Bousquet <span class="citation">(<a href="#ref-bousquet_diagnostics_2008" role="doc-biblioref">2008</a>)</span> in his Example 1 concerning a normal model. The lower bound of 0 arises out of the natural constraint that negative turnover will not occur, the upper bound of 5 was considered as a value that could not be attained, yet this number is to some extent arbitrary and a sensitivity analysis was conducted to investigate the impact of the choice for <span class="math inline">\(\pi^J(\theta)\)</span>. With regard to the desired minimal influence of <span class="math inline">\(\pi^J(\theta)\)</span> on <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span>, in our case, the reference posterior can be analytically calculated (see Yang and Berger <span class="citation">(<a href="#ref-yang_catalog_1996" role="doc-biblioref">1996</a>)</span>). The KL divergence for approximating the reference posterior with <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> was 0.00016, which we considered to be negligible.</p>
<p>We obtained the posterior distribution <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> using the rjags R-package <span class="citation">(Plummer, <a href="#ref-plummer_rjags:_2018" role="doc-biblioref">2018</a>)</span>, such that <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> is the <span class="math inline">\(N(\mu_1,\sigma^2_1)\)</span> density where <span class="math inline">\(\mu_1\)</span> denotes the posterior mean and <span class="math inline">\(\sigma^2_1\)</span> denotes the posterior variance. We used four chains of 25,000 samples after a burn-in period of 1000 samples per chain. Visual inspection and Gelman–Rubin diagnostics <span class="citation">(Gelman & Rubin, <a href="#ref-gelman_inference_1992" role="doc-biblioref">1992</a>)</span> did not point towards problems with convergence of the chains and inspection of the autocorrelation plots showed no issues concerning autocorrelation. To compute the marginal likelihoods and BF, we used the R-Package rstan <span class="citation">(Stan Development Team, <a href="#ref-stan_development_team_rstan:_2018" role="doc-biblioref">2018</a><a href="#ref-stan_development_team_rstan:_2018" role="doc-biblioref">b</a>)</span> with four chains of 1000 samples after burn-in to obtain the posterior distributions and we used the bridgesampling R-package <span class="citation">(Gronau & Singmann, <a href="#ref-gronau_bridgesampling:_2017" role="doc-biblioref">2017</a>)</span> to obtain the marginal likelihoods and BF. For more details, see the data archive on the OSF webpage. Table <a href="DAC1.html#tab:ch03tab2">3.2</a> displays KL divergences, <span class="math inline">\(DAC_d\)</span> scores and ranking, marginal likelihoods and <span class="math inline">\(BF_{Jd}\)</span> scores and ranking. Figure <a href="DAC1.html#fig:ch03fig5">3.5</a> visually presents all relevant distributions concerning the empirical study. Figure <a href="DAC1.html#fig:ch03fig6">3.6</a> panels A through E visually present all KL divergences from Table <a href="DAC1.html#tab:ch03tab2">3.2</a>. Table <a href="DAC1.html#tab:ch03tab3">3.3</a> presents the results for the sensitivity analysis for different choices for <span class="math inline">\(\pi^J(\theta)\)</span> an and Table <a href="DAC1.html#tab:ch03tab4">3.4</a> allows for a comparison between experts without reference to any benchmark <span class="math inline">\(\pi^J(\theta)\)</span>.</p>
<div class="figure" style="text-align: center"><span id="fig:ch03fig5"></span>
<img src="figures/chapter_3/Figure5.png" alt="Visual presentation of all relevant distributions for the empirical study; $\pi_d(\theta)$, $\pi^J(\theta)$ and $\pi^J(\theta|\textbf{y})$." width="90%" />
<p class="caption">
Figure 3.5: Visual presentation of all relevant distributions for the empirical study; <span class="math inline">\(\pi_d(\theta)\)</span>, <span class="math inline">\(\pi^J(\theta)\)</span> and <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span>.
</p>
</div>
<p>The results of Table <a href="DAC1.html#tab:ch03tab2">3.2</a> show that expert four provided the best prediction out of the experts, when using both the <span class="math inline">\(DAC_d\)</span> and the <span class="math inline">\(BF_{Jd}\)</span>. Experts one and two provided similar predictions concerning their tacit knowledge; they expected almost the same value for the location parameter; however, expert one was less certain about this prediction (see Table <a href="DAC1.html#tab:ch03tab1">3.1</a>). As the prediction of the location was not entirely correct, the increased uncertainty of expert one means that this expert provided more plausibility to the regions of the parameter space that were also supported by the data. Here we see the difference between <span class="math inline">\(DAC_d\)</span> and the <span class="math inline">\(BF_{Jd}\)</span> arise as discussed in section <a href="DAC1.html#DACvsBF2">3.2.3</a>. Overconfidence is penalized more severely by the <span class="math inline">\(DAC_d\)</span> and as such the conclusion on which expert would be preferred changes between experts one and two depending on which measure you use. When we look at the <span class="math inline">\(DAC_d\)</span>, in the case when <span class="math inline">\(\pi^J(\theta)\)</span> is the <span class="math inline">\(U(0, 5)\)</span> density, the additional penalization of the overconfidence even causes a different conclusion between experts one and two, namely, expert one is in prior-data agreement and expert two is in prior-data disagreement. For the <span class="math inline">\(BF_{Jd}\)</span> both are concluded to be in agreement with the data. Expert three provided a prediction that, to a large extent, did not support the same parameter space as the data. In fact, expert three provides a lot of support for regions of the parameter space that the data did not support. The discrepancy between expert three and the data was of such proportions that, besides expert two, we also concluded a prior-data disagreement to exist for expert three. If we had no information beforehand, except knowing the region within which the average turnover per professional could fall, we would have lost less information than by considering the predictions of experts two and three. The <span class="math inline">\(BF_{Jd}\)</span> differs from the <span class="math inline">\(DAC_d\)</span> in the sense that when <span class="math inline">\(\pi^J(\theta)\)</span> is the <span class="math inline">\(U(0, 5)\)</span> density, the benchmark only outperforms expert 3.</p>
<div class="figure" style="text-align: center"><span id="fig:ch03fig6"></span>
<img src="figures/chapter_3/Figure6.png" alt="All KL divergences for $\pi_d(\theta)$ (A–D) and $\pi^J(\theta)$ (E) with $\pi^J(\theta|\textbf{y})$ as the distribution that is to be approximated. (A) is for expert one; (B) for expert two; (C) for expert three and (D) for expert four." width="90%" />
<p class="caption">
Figure 3.6: All KL divergences for <span class="math inline">\(\pi_d(\theta)\)</span> (A–D) and <span class="math inline">\(\pi^J(\theta)\)</span> (E) with <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> as the distribution that is to be approximated. (A) is for expert one; (B) for expert two; (C) for expert three and (D) for expert four.
</p>
</div>
<div style="page-break-after: always;"></div>
<table style="width:99%;">
<caption><span id="tab:ch03tab2">Table 3.2: </span> KL divergences, <span class="math inline">\(DAC_d\)</span> scores and ranking, marginal likelihoods and <span class="math inline">\(BF_{Jd}\)</span> scores and ranking, for the experts’ priors and the benchmark prior. Note that marginal likelihoods are reported and not the log marginal likelihoods.</caption>
<colgroup>
<col width="13%" />
<col width="14%" />
<col width="10%" />
<col width="17%" />
<col width="19%" />
<col width="11%" />
<col width="12%" />
</colgroup>
<thead>
<tr class="header">
<th></th>
<th align="center">KL Divergence</th>
<th align="center"><span class="math inline">\(DAC_d\)</span></th>
<th align="center">Ranking <span class="math inline">\(DAC_d\)</span></th>
<th align="center"><span class="math inline">\(m_d(\textbf{y})\)</span> &
<span class="math inline">\(m^J(\textbf{y})\)</span></th>
<th align="center"><span class="math inline">\(BF_{Jd}\)</span></th>
<th align="center">Ranking
<span class="math inline">\(BF_{Jd}\)</span></th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>Expert 1</td>
<td align="center">1.43</td>
<td align="center">0.56</td>
<td align="center">2</td>
<td align="center">5.57 x <span class="math inline">\(10^{-68}\)</span></td>
<td align="center">0.21</td>
<td align="center">3</td>
</tr>
<tr class="even">
<td>Expert 2</td>
<td align="center">2.86</td>
<td align="center">1.12</td>
<td align="center">3</td>
<td align="center">6.82 x <span class="math inline">\(10^{-68}\)</span></td>
<td align="center">0.17</td>
<td align="center">2</td>
</tr>
<tr class="odd">
<td>Expert 3</td>
<td align="center">5.76</td>
<td align="center">2.26</td>
<td align="center">4</td>
<td align="center">2.19 x <span class="math inline">\(10^{-69}\)</span></td>
<td align="center">5.31</td>
<td align="center">4</td>
</tr>
<tr class="even">
<td>Expert 4</td>
<td align="center">0.19</td>
<td align="center">0.07</td>
<td align="center">1</td>
<td align="center">1.72 x <span class="math inline">\(10^{-67}\)</span></td>
<td align="center">0.07</td>
<td align="center">1</td>
</tr>
<tr class="odd">
<td>Benchmark</td>
<td align="center">2.55</td>
<td align="center">–</td>
<td align="center">–</td>
<td align="center">1.16 x <span class="math inline">\(10^{-68}\)</span></td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
</tbody>
</table>
<table style="width:99%;">
<caption><span id="tab:ch03tab3">Table 3.3: </span> Sensitivity analysis for different choices for <span class="math inline">\(\pi^J(\theta)\)</span>. Densities are given in the columns. The KL divergences and marginal likelihood <span class="math inline">\(m^J(\textbf{y})\)</span> are presented in the rows. <span class="math inline">\(m_d(\textbf{y})\)</span> do not change and are not reported.</caption>
<colgroup>
<col width="24%" />
<col width="14%" />
<col width="14%" />
<col width="14%" />
<col width="16%" />
<col width="16%" />
</colgroup>
<thead>
<tr class="header">
<th></th>
<th align="center"><span class="math inline">\(U(0,5)\)</span></th>
<th align="center"><span class="math inline">\(U(-10,10)\)</span></th>
<th align="center"><span class="math inline">\(N(0,10^2)\)</span></th>
<th align="center"><span class="math inline">\(N(0,10^3)\)</span></th>
<th align="right"><span class="math inline">\(N(0,10^4)\)</span></th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td><span class="math inline">\(KL[\pi^J(.|\textbf{y})||\pi_1]\)</span></td>
<td align="center">1.43</td>
<td align="center">1.42</td>
<td align="center">1.37</td>
<td align="center">1.42</td>
<td align="right">1.42</td>
</tr>
<tr class="even">
<td><span class="math inline">\(KL[\pi^J(.|\textbf{y})||\pi_2]\)</span></td>
<td align="center">2.86</td>
<td align="center">2.84</td>
<td align="center">2.75</td>
<td align="center">2.85</td>
<td align="right">2.85</td>
</tr>
<tr class="odd">
<td><span class="math inline">\(KL[\pi^J(.|\textbf{y})||\pi_3]\)</span></td>
<td align="center">5.76</td>
<td align="center">5.75</td>
<td align="center">5.67</td>
<td align="center">5.76</td>
<td align="right">5.77</td>
</tr>
<tr class="even">
<td><span class="math inline">\(KL[\pi^J(.|\textbf{y})||\pi_4]\)</span></td>
<td align="center">0.19</td>
<td align="center">0.19</td>
<td align="center">0.20</td>
<td align="center">0.19</td>
<td align="right">0.19</td>
</tr>
<tr class="odd">
<td><span class="math inline">\(KL[\pi^J(.|\textbf{y})||\pi^J]\)</span></td>
<td align="center">2.55</td>
<td align="center">3.93</td>
<td align="center">4.18</td>
<td align="center">6.46</td>
<td align="right">8.76</td>
</tr>
<tr class="even">
<td><span class="math inline">\(m^J(\textbf{y})\)</span></td>
<td align="center">1.16 x <span class="math inline">\(10^{-68}\)</span></td>
<td align="center">2.91 x <span class="math inline">\(10^{-69}\)</span></td>
<td align="center">5.65 x <span class="math inline">\(10^{-69}\)</span></td>
<td align="center">2.26 x <span class="math inline">\(10^{-69}\)</span></td>
<td align="right">7.33 x <span class="math inline">\(10^{-70}\)</span></td>
</tr>
</tbody>
</table>
<table style="width:99%;">
<caption><span id="tab:ch03tab4">Table 3.4: </span> Comparison between experts based on KL divergences and marginal likelihoods. We report BF in favor of the row over the column and KL ratios for loss of information of the row over loss of information of the column.</caption>
<colgroup>
<col width="13%" />
<col width="12%" />
<col width="7%" />
<col width="13%" />
<col width="7%" />
<col width="13%" />
<col width="8%" />
<col width="12%" />
<col width="7%" />
</colgroup>
<thead>
<tr class="header">
<th></th>
<th align="center">Expert 1</th>
<th></th>
<th align="center">Expert 2</th>
<th></th>
<th align="center">Expert 3</th>
<th></th>
<th align="center">Expert 4</th>
<th></th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td></td>
<td align="center">KL Ratio</td>
<td>BF</td>
<td align="center">KL Ratio</td>
<td>BF</td>
<td align="center">KL Ratio</td>
<td>BF</td>
<td align="center">KL Ratio</td>
<td>BF</td>
</tr>
<tr class="even">
<td>Expert 1</td>
<td align="center">1</td>
<td>1</td>
<td align="center">0.50</td>
<td>0.82</td>
<td align="center">0.25</td>
<td>25.42</td>
<td align="center">7.63</td>
<td>0.32</td>
</tr>
<tr class="odd">
<td>Expert 2</td>
<td align="center">2.00</td>
<td>1.22</td>
<td align="center">1</td>
<td>1</td>
<td align="center">0.50</td>
<td>31.13</td>
<td align="center">15.23</td>
<td>0.40</td>
</tr>
<tr class="even">
<td>Expert 3</td>
<td align="center">4.03</td>
<td>0.04</td>
<td align="center">2.02</td>
<td>0.03</td>
<td align="center">1</td>
<td>1</td>
<td align="center">30.75</td>
<td>0.01</td>
</tr>
<tr class="odd">
<td>Expert 4</td>
<td align="center">0.13</td>
<td>3.09</td>
<td align="center">0.07</td>
<td>2.52</td>
<td align="center">0.03</td>
<td>78.54</td>
<td align="center">1</td>
<td>1</td>
</tr>
</tbody>
</table>
<div style="page-break-after: always;"></div>
<p>From the sensitivity analyses of Table <a href="DAC1.html#tab:ch03tab3">3.3</a> we can find that the reference posterior remains quite stable and therefore the KL divergences for the experts do not change substantially; however, the changing KL divergence for the benchmark would shift the prior-data disagreement boundary. When <span class="math inline">\(\pi^J(\theta)\)</span> was the <span class="math inline">\(N(0,10^3)\)</span> or <span class="math inline">\(N(0,10^4)\)</span> density, expert three would no longer be in prior-data conflict, whilst prior-data disagreement for expert two was only concluded if <span class="math inline">\(\pi^J(\theta)\)</span> was the <span class="math inline">\(U(0, 5)\)</span> density. For the BF changing the benchmark also shifts the prior-data (dis)agreement boundary arbitrarily. In this case our decisions on prior-data (dis)agreement would only change for the <span class="math inline">\(N(0, 10^4)\)</span> prior, where expert 4 would no longer be in prior-data disagreement. The sensitivity analysis showed that decisions on prior-data (dis)agreement might not be entirely reliable, whilst the ranking of experts remained stable.</p>
<p>Table <a href="DAC1.html#tab:ch03tab4">3.4</a> shows the results when we only compare experts on their KL divergences and their marginal likelihoods and we omit the benchmarks. We see the difference between the BF and the KL divergence ratios when we compare experts one and two. The differences arise from the more severe penalization of overconfidence by KL divergences compared to BF, as discussed in section <a href="DAC1.html#DACvsBF2">3.2.3</a>. Using KL divergence ratios we concluded that expert two had twice the amount of loss of information, whilst the BF even favors expert two over expert one with odds of 1.22</p>
<p>The results of the empirical study show a slight difference in the conclusions with regard to the ranking of the experts depending on which measure we used, <span class="math inline">\(DAC_d\)</span> or <span class="math inline">\(BF_{Jd}\)</span>. Both measures select the same expert as being the best. If decisions should be made concerning average turnover per professional, decision makers would be wise to consult expert four, as this expert seemed to have the best knowledge of the underlying factors driving these results.</p>
</div>
</div>
<div id="ch03discussion" class="section level2">
<h2><span class="header-section-number">3.4</span> Discussion</h2>
<p>In this paper, we use both the BF and the DAC to rank experts’ beliefs when they are specified in the probabilistic form of prior distributions. When comparing the BF and the DAC, the limiting case example of Section <a href="DAC1.html#DACvsBF2">3.2.3</a> springs to mind. In the introduction, we stated that forecasting without specifying uncertainty would not make sense to us and, in that light, we would prefer to use a measure that would classify doing so as undesirable behavior and punish this extreme case. An example of this behavior can be seen in the empirical example where while using the BF we would favor expert two over expert one, however whilst using KL divergences, we would favor expert one over expert two.</p>
<p>The sensitivity analysis in the empirical example, however, also highlighted some undesirable characteristics of the DAC for our context, namely the sensitivity to different choices for <span class="math inline">\(\pi^J(\theta)\)</span>. In the context of ranking experts, it can make sense to drop the association between <span class="math inline">\(\pi^J(\theta)\)</span> and <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span>. <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> can remain a reference posterior and as such represent the characteristics of <span class="math inline">\(\textbf{y}\)</span>. <span class="math inline">\(\pi^J(\theta)\)</span> can either be omitted or be specified such that it is meaningful. If <span class="math inline">\(\pi^J(\theta)\)</span> is omitted, we do not have a reference point for (dis)agreement; however, if arbitrarily chosen benchmarks shift this reference point, it hardly has any meaning. Without a benchmark, experts can still be compared with each other in terms of ratios of loss of information, as presented in Table <a href="DAC1.html#tab:ch03tab4">3.4</a>. However, if <span class="math inline">\(\pi^J(\theta)\)</span> is meaningful, one could imagine, for instance, a gold standard that is used in a forecasting situation; we can assess experts’ beliefs in relation to this meaningful benchmark and see if they outperform this benchmark. If the association between <span class="math inline">\(\pi^J(\theta)\)</span> and <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> is dropped, we can specify informative benchmarks without the adverse effects of changing <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> and thereby the divergences between <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> and <span class="math inline">\(\pi_d(\theta)\)</span>. Moreover, specifying informative benchmarks requires elaboration of the rationale behind the choice, thus enhancing trust in the conclusions if a sensitivity analysis shows different priors representing similar information that leads us to the same conclusions.</p>
<p>One of the reasons for the sensitivity of the DAC to different choices for <span class="math inline">\(\pi^J(\theta)\)</span> can be seen by comparing the KL divergences of expert one and two of the empirical example. As a referee pointed out to us, KL divergences are tail sensitive and this can be seen in this comparison. Expert one is a little more uncertain and as such the tail of <span class="math inline">\(\pi_1(\theta)\)</span> overlaps somewhat more with <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> than the tails of <span class="math inline">\(\pi_2(\theta)\)</span>. This leads to half the loss of information. One could deem this tail sensitivity to be undesirable and, with differently shaped prior distributions, this problem might become more pronounced. If it is deemed undesirable, one could favor using the BF, which actually favors expert two with odds of 1.22 over expert 1. Alternatively, an interesting area for future research could be to investigate the use of alternative divergence measures. A good starting point for finding alternative measures can be found in the Encyclopedia of Distances by Deza and Deza <span class="citation">(<a href="#ref-deza_encyclopedia_2009" role="doc-biblioref">2009</a>)</span>.</p>
<p>In the current paper, we followed Bousquet <span class="citation">(<a href="#ref-bousquet_diagnostics_2008" role="doc-biblioref">2008</a>)</span> and used KL divergences and this raises two important methodological issues; see Burnham and Anderson <span class="citation">(<a href="#ref-burnham_model_2002" role="doc-biblioref">2002</a>)</span> for an elaborated discussion. First, the reference model should be known. Second, the parameters should be known for the model that is evaluated, i.e., the formalized expert prior. The issues make the KL divergence a measure that, according to some, for instance Burnham and Anderson <span class="citation">(<a href="#ref-burnham_model_2002" role="doc-biblioref">2002</a>)</span>, cannot be used for real world problems and previously led to the development of the AIC <span class="citation">(Akaike, <a href="#ref-akaike_information_1973" role="doc-biblioref">1973</a>)</span>, which uses the relative expected KL divergence. The AIC deals with the two issues by taking the reference model as a constant in comparing multiple models and using the maximum likelihood estimates for the parameters of the models to be evaluated, introducing a penalty term for the bias that this induces.</p>
<p>We conclude that we can use the KL divergence in the context of the DACd and with the following reasoning. We define <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> to be the reference distribution as it reflects a fictional expert that is completely informed by the data and thus it is known. In the case of the empirical example, the data is even the true state of affairs, i.e., the actual realizations of the turnover for each professional. Concerning the parameter for the models to be evaluated, <span class="math inline">\(\pi_d(\theta)\)</span> should reflect the exact beliefs of the experts. We use the Five-Step Method <span class="citation">(Veen et al., <a href="#ref-veen_proposal_2017" role="doc-biblioref">2017</a>)</span> which incorporates feedback at each stage of the elicitation, ensuring that experts confirm that their beliefs are accurately represented by the location, shape and scale parameters. We acknowledge that whether the parameters represent exactly an expert’s beliefs cannot be known, but we feel confident that the procedure we use at least aims to obtain very accurate representations. As experts can continue to adapt their input until they are satisfied with the representation of their beliefs, this should overcome problems with the second issue.</p>
<p>While we use <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span>, and thus know the reference distribution, and we firmly believe that we properly represent the experts’ beliefs, it seems highly implausible that a DAC score of 0 can be attained. It is unlikely that, in predicting future events, one estimates precisely the optimal location and exactly the optimal amount of uncertainty.</p>
<p>Although a priori specification of optimal uncertainty is unlikely, we are able to gain an indication of the appropriate amount of uncertainty a posteriori. <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> provides an excellent indication of the appropriate uncertainty. Given that one had no knowledge beforehand and is rationally guided by the data, following probabilistic reasoning, one arrives at the posterior belief represented by <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> <span class="citation">(Bousquet, <a href="#ref-bousquet_diagnostics_2008" role="doc-biblioref">2008</a>; Irony & Singpurwalla, <a href="#ref-irony_noninformative_1997" role="doc-biblioref">1997</a>)</span>. The posterior described the range of values that would have been plausible given this information. This indication is, however, conditional on the fact that the data provide an accurate representation of the state of affairs.</p>
<p>Given that we can attain information on the expected value for the parameter of interest, the appropriate amount of uncertainty and the quality of the approximation by each expert, we can start a learning process. By sharing the reasons behind the choices they made, experts can learn from one another as evidence shows which reasoning leads to the most accurate predictions. The data can inform the experts so that they can adjust their estimates and uncertainty. Through this evaluation, expertise can increase and in the long run convergence should be reached between both different experts’ predictions and between the experts and the data. When this convergence is reached, this indicates that at least part of the epistemic uncertainty is eliminated and we have a better understanding of the data generating processes and are better able to make an informed decision. Note that, if we wish to incorporate the relevant factors that are identified by the experts, these should be included in the model so that part of the posterior uncertainty about our parameter can be explained. The explained variance can be seen as a reduction of the epistemic uncertainty or learning effect.</p>
<p>In the empirical example, we can already see some opportunities for learning. For example, expert three misestimated the location of the parameter, which indicates, at least to some extent, faulty or missing tacit knowledge. By starting a dialogue with the other experts, he or she could learn why they all estimated the average turnover per professional to be higher. Expert one and two had almost identical predictions concerning the location, but expert one expressed more uncertainty. Perhaps this indicated more acknowledgement of epistemic uncertainty; a dialogue could shed more light on the differences in choices of expert one and two. Our empirical example contains just four experts, but the methods used are easily scalable to include more experts, with only additional elicitation efforts required. Including more experts can result in more opportunities for learning.</p>
<p>Concerning the appropriateness of the ranking that is obtained using the <span class="math inline">\(DAC_d\)</span>, we have the following to add. One could argue that perhaps the sample entails extreme data. However, even if this is true, the experts should have considered the data to be plausible, for it did occur. Thus, if an expert exhibits large KL divergence with <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span>, this expert simply did not expect that these data were likely or plausible. By incorporating (un)certainty in the evaluation, the <span class="math inline">\(DAC_d\)</span>, or KL divergences if a benchmark is omitted, produces the required behavior to fairly compare experts’ beliefs. Given that it is appropriate to take uncertainty into account, a prior can be over-specific such that it does not adhere to the principles underlying the data generating mechanism. KL divergences reward the specification of an appropriate amount of uncertainty and penalize overconfidence.</p>
<div style="page-break-after: always;"></div>
<p>To conclude this discussion, we state recommendations for researchers facing similar problems:</p>
<ul>
<li>Use <span class="math inline">\(DAC_d\)</span> instead of BF.</li>
<li>Specify <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> such that it serves as a reference posterior and drop the association between <span class="math inline">\(\pi^J(\theta|\textbf{y})\)</span> and <span class="math inline">\(\pi^J(\theta)\)</span>.</li>
<li>Consider whether a meaningful benchmark can be determined. If not, only use <span class="math inline">\(KL[\pi^J(.|\textbf{y})||\pi_d]\)</span> and compare experts with each other and not with a benchmark.</li>
<li>Carrying out a sensitivity analysis is always recommendable, even more so if benchmarks are used.</li>
</ul>
</div>
<div id="ch03ethics" class="section level2 unnumbered">
<h2>Ethics Statement</h2>
<p>This study was carried out in accordance with the recommendations of the internal Ethics Committee of the Faculty of Social and Behavioural Sciences of Utrecht University, with written informed consent from all subjects. All subjects gave written informed consent in accordance with the Declaration of Helsinki. The protocol was approved by the internal Ethics Committee of the Faculty of Social and Behavioural Sciences of Utrecht University.</p>
</div>
<div id="ch03funding" class="section level2 unnumbered">
<h2>Funding</h2>
<p>The project was supported by the Netherlands Organization for Scientific Research grant number NWO-VIDI-452-14-006. K.M. was supported by the Netherlands Organization for Scientific Research grant number NWO-452-12-010.</p>
</div>
<div id="ch03acknowledgments" class="section level2 unnumbered">
<h2>Acknowledgments</h2>
<p>We are grateful to all participants of the empirical study for their time, energy and predictions. In addition, we would like to thank the company for allowing us access to their resources and information, thereby enabling us to provide empirical support for the theoretical work. We would also like to thank the anonymous reviewers whose comments and suggestions greatly improved the manuscript.</p>
</div>
<div id="ch03conflict" class="section level2 unnumbered">
<h2>Conflicts of Interest Statement</h2>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</div>
</div>
<h3>References</h3>
<div id="refs" class="references">
<div id="ref-akaike_information_1973">
<p>Akaike, H. (1973). Information theory as an extension of the maximum likelihood principle. In <em>Second international symposium on information theory</em> (pp. 267–281). Budapest, Hungary: Akademiai Kaido.</p>
</div>
<div id="ref-barber_bayesian_2012">
<p>Barber, D. (2012). <em>Bayesian reasoning and machine learning</em>. Cambridge University Press.</p>
</div>
<div id="ref-barons_eliciting_2018">
<p>Barons, M. J., Wright, S. K., & Smith, J. Q. (2018). Eliciting probabilistic judgements for integrating decision support systems. In L. C. Dias, A. Morton, & J. Quigley (Eds.), <em>Elicitation</em> (pp. 445–478). Springer.</p>
</div>
<div id="ref-berger_estimating_1989">
<p>Berger, J. O., & Bernardo, J. M. (1989). Estimating a product of means: Bayesian analysis with reference priors. <em>Journal of the American Statistical Association</em>, <em>84</em>(405), 200–207.</p>
</div>
<div id="ref-berger_formal_2009">
<p>Berger, J. O., Bernardo, J. M., & Sun, D. (2009). The formal definition of reference priors. <em>The Annals of Statistics</em>, <em>37</em>(2), 905–938.</p>
</div>
<div id="ref-bernardo_reference_1979">
<p>Bernardo, J. M. (1979). Reference posterior distributions for Bayesian inference. <em>Journal of the Royal Statistical Society. Series B (Methodological)</em>, 113–147.</p>
</div>
<div id="ref-bernardo_bayesian_1994">
<p>Bernardo, J. M., & Smith, A. F. (1994). <em>Bayesian theory</em>. New York, NY: John Wiley & Sons, LTD.</p>
</div>
<div id="ref-bolsinova_using_2017">
<p>Bolsinova, M., Hoijtink, H., Vermeulen, J. A., & Beguin, A. (2017). Using expert knowledge for test linking. <em>Psychological Methods</em>, <em>22</em>(4), 705.</p>
</div>
<div id="ref-bousquet_diagnostics_2008">
<p>Bousquet, N. (2008). Diagnostics of prior-data agreement in applied Bayesian analysis. <em>Journal of Applied Statistics</em>, <em>35</em>(9), 1011–1029.</p>
</div>
<div id="ref-brier_verification_1950">
<p>Brier, G. W. (1950). Verification of forecasts expressed in terms of probability. <em>Monthey Weather Review</em>, <em>78</em>(1), 1–3.</p>
</div>
<div id="ref-burnham_model_2002">
<p>Burnham, K. P., & Anderson, D. R. (2002). <em>Model selection and multimodel inference: A practical information-theoretic approach</em>. Springer Science & Business Media.</p>
</div>
<div id="ref-cohen_coefficient_1960">
<p>Cohen, J. (1960). A coefficient of agreement for nominal scales. <em>Educational and Psychological Measurement</em>, <em>20</em>(1), 37–46.</p>
</div>
<div id="ref-cooke_experts_1991">
<p>Cooke, R. M. (1991). <em>Experts in uncertainty: Opinion and subjective probability in science</em>. Oxford University Press on Demand.</p>
</div>
<div id="ref-deza_encyclopedia_2009">
<p>Deza, M. M., & Deza, E. (2009). Encyclopedia of distances. In <em>Encyclopedia of Distances</em> (pp. 1–583). Springer.</p>
</div>
<div id="ref-dirac_principles_1947">
<p>Dirac, P. A. M. (1947). <em>The principles of quantum mechanics</em>. Oxford: Clarendon Press.</p>
</div>
<div id="ref-fernandez_bayesian_1998">
<p>Fernández, C., & Steel, M. F. J. (1998). On Bayesian modeling of fat tails and skewness. <em>Journal of the American Statistical Association</em>, <em>93</em>(441), 359–371.</p>
</div>
<div id="ref-fu_bayesian_2015">
<p>Fu, S., Celeux, G., Bousquet, N., & Couplet, M. (2015). Bayesian inference for inverse problems occurring in uncertainty analysis. <em>International Journal for Uncertainty Quantification</em>, <em>5</em>(1).</p>
</div>
<div id="ref-fu_adaptive_2017">
<p>Fu, S., Couplet, M., & Bousquet, N. (2017). An adaptive kriging method for solving nonlinear inverse statistical problems. <em>Environmetrics</em>, <em>28</em>(4).</p>
</div>
<div id="ref-gelman_bayesian_2013">
<p>Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari, A., & Rubin, D. B. (2013). <em>Bayesian data analysis</em>. CRC press.</p>
</div>
<div id="ref-gelman_inference_1992">
<p>Gelman, A., & Rubin, D. B. (1992). Inference from iterative simulation using multiple sequences. <em>Statistical Science</em>, 457–472.</p>
</div>
<div id="ref-gelman_prior_2017">
<p>Gelman, A., Simpson, D., & Betancourt, M. (2017). The prior can often only be understood in the context of the likelihood. <em>Entropy</em>, <em>19</em>(10), 555.</p>
</div>
<div id="ref-gronau_bridgesampling:_2017">
<p>Gronau, Q. F., & Singmann, H. (2017). <em>Bridgesampling: Bridge Sampling for Marginal Likelihoods and Bayes Factors</em>. Retrieved from <a href="https://CRAN.R-project.org/package=bridgesampling">https://CRAN.R-project.org/package=bridgesampling</a></p>
</div>
<div id="ref-irony_noninformative_1997">
<p>Irony, T., & Singpurwalla, N. (1997). Noninformative priors do not exist: A discussion with jose m. Bernardo. <em>Journal of Statistical Inference and Planning</em>, <em>65</em>(1), 159–189.</p>
</div>
<div id="ref-jaynes_rationale_1982">
<p>Jaynes, E. T. (1982). On the rationale of maximum-entropy methods. <em>Proceedings of the IEEE</em>, <em>70</em>(9), 939–952.</p>
</div>
<div id="ref-jeffreys_invariant_1946">
<p>Jeffreys, H. (1946). An invariant form for the prior probability in estimation problems. <em>Proceedings of the Royal Society of London. Series A, Mathematical and Physical Sciences</em>, 453–461.</p>
</div>
<div id="ref-jeffreys_theory_1961">
<p>Jeffreys, H. (1961). <em>Theory of probability</em>. London, UK: Oxford University Press.</p>
</div>
<div id="ref-kass_bayes_1995">
<p>Kass, R. E., & Raftery, A. E. (1995). Bayes factors. <em>Journal of the American Statistical Association</em>, <em>90</em>(430), 773–795.</p>
</div>
<div id="ref-kass_selection_1996">
<p>Kass, R. E., & Wasserman, L. (1996). The selection of prior distributions by formal rules. <em>Journal of the American Statistical Association</em>, <em>91</em>(435), 1343–1370.</p>
</div>
<div id="ref-koch_intraclass_2004">
<p>Koch, G. G. (2004). Intraclass correlation coefficient. <em>Encyclopedia of Statistical Sciences</em>, <em>6</em>.</p>
</div>
<div id="ref-kullback_information_1951">
<p>Kullback, S., & Leibler, R. A. (1951). On information and sufficiency. <em>The Annals of Mathematical Statistics</em>, <em>22</em>(1), 79–86.</p>
</div>
<div id="ref-liu_bayes_2008">
<p>Liu, C. C., & Aitkin, M. (2008). Bayes factors: Prior sensitivity and model generalizability. <em>Journal of Mathematical Psychology</em>, <em>52</em>(6), 362–375.</p>
</div>
<div id="ref-lynch_introduction_2007">
<p>Lynch, S. M. (2007). <em>Introduction to applied Bayesian statistics and estimation for social scientists</em>. Springer Science & Business Media.</p>
</div>
<div id="ref-morey_philosophy_2016">
<p>Morey, R. D., Romeijn, J.-W., & Rouder, J. N. (2016). The philosophy of Bayes factors and the quantification of statistical evidence. <em>Journal of Mathematical Psychology</em>, <em>72</em>, 6–18.</p>
</div>
<div id="ref-ohagan_uncertain_2006">
<p>O’Hagan, A., Buck, C. E., Daneshkhah, A., Eiser, J. R., Garthwaite, P. H., Jenkinson, D. J., … Rakow, T. (2006). <em>Uncertain judgements: Eliciting experts’ probabilities</em>. John Wiley & Sons.</p>
</div>
<div id="ref-plummer_rjags:_2018">
<p>Plummer, M. (2018). <em>Rjags: Bayesian Graphical Models using MCMC</em>. Retrieved from <a href="https://CRAN.R-project.org/package=rjags">https://CRAN.R-project.org/package=rjags</a></p>
</div>
<div id="ref-quigley_elicitation_2018">
<p>Quigley, J., Colson, A., Aspinall, W., & Cooke, R. M. (2018). Elicitation in the classical model. In L. C. Dias, A. Morton, & J. Quigley (Eds.), <em>Elicitation</em> (pp. 15–36). Springer.</p>
</div>
<div id="ref-raftery_approximate_1996">
<p>Raftery, A. E. (1996). Approximate Bayes factors and accounting for model uncertainty in generalised linear models. <em>Biometrika</em>, <em>83</em>(2), 251–266.</p>
</div>
<div id="ref-shrout_intraclass_1979">
<p>Shrout, P. E., & Fleiss, J. L. (1979). Intraclass correlations: Uses in assessing rater reliability. <em>Psychological Bulletin</em>, <em>86</em>(2), 420.</p>
</div>
<div id="ref-stan_development_team_rstan:_2018">
<p>Stan Development Team. (2018b). <em>RStan: The R interface to Stan</em>. Retrieved from <a href="http://mc-stan.org/">http://mc-stan.org/</a></p>
</div>
<div id="ref-van_de_schoot_dealing_2018">
<p>van de Schoot, R., Griffioen, E., & Winter, S. (2018). Dealing with imperfect elicitation results. In T. Bedford, S. French, A. M. Hanea, & G. F. Nane (Eds.), <em>Expert judgement in risk and decision analysis</em>.</p>
</div>
<div id="ref-veen_proposal_2017">
<p>Veen, D., Stoel, D., Zondervan-Zwijnenburg, M., & van de Schoot, R. (2017). Proposal for a Five-Step Method to Elicit Expert Judgement. <em>Frontiers in Psychology</em>, <em>8</em>, 2110.</p>
</div>
<div id="ref-walley_advantages_2015">
<p>Walley, R. J., Smith, C. L., Gale, J. D., & Woodward, P. (2015). Advantages of a wholly Bayesian approach to assessing efficacy in early drug development: A case study. <em>Pharmaceutical Statistics</em>, <em>14</em>(3), 205–215.</p>
</div>
<div id="ref-wasserman_bayesian_2000">
<p>Wasserman, L. (2000). Bayesian model selection and model averaging. <em>Journal of Mathematical Psychology</em>, <em>44</em>(1), 92–107.</p>
</div>
<div id="ref-yang_catalog_1996">
<p>Yang, R., & Berger, J. O. (1996). <em>A catalog of noninformative priors</em>. Institute of Statistics; Decision Sciences, Duke University.</p>
</div>
<div id="ref-zondervan-zwijnenburg_application_2017">
<p>Zondervan-Zwijnenburg, M., van de Schoot-Hubeek, W., Lek, K., Hoijtink, H., & van de Schoot, R. (2017b). Application and evaluation of an expert judgment elicitation procedure for correlations. <em>Frontiers in Psychology</em>, <em>8</em>, 90.</p>
</div>
<div id="ref-zyphur_bayesian_2015">
<p>Zyphur, M. J., Oswald, F. L., & Rupp, D. E. (2015). Bayesian probability and statistics in management research [special issue]. <em>Journal of Management</em>, <em>41</em>(2).</p>
</div>
</div>
</section>
</div>
</div>
</div>
<a href="fivestep.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
<a href="Hierarchical.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
</div>
</div>
<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
<script src="libs/gitbook-2.6.7/js/lunr.js"></script>
<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
<script>
gitbook.require(["gitbook"], function(gitbook) {
gitbook.start({
"sharing": {
"github": false,
"facebook": true,
"twitter": true,
"google": false,
"linkedin": true,
"weibo": false,
"instapaper": false,
"vk": false,
"all": ["facebook", "google", "twitter", "linkedin", "weibo", "instapaper"]
},
"fontsettings": {
"theme": "white",
"family": "sans",
"size": 2
},
"edit": {
"link": null,
"text": null
},
"history": {
"link": null,
"text": null
},
"download": ["Dissertation_Duco_Veen.pdf"],
"toc": {
"collapse": "section"
},
"search": true
});
});
</script>
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
var src = "true";
if (src === "" || src === "true") src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-MML-AM_CHTML";
if (location.protocol !== "file:")
if (/^https?:/.test(src))
src = src.replace(/^https?:/, '');
script.src = src;
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>