;+ ; NAME: ; grubbs ; ; ; PURPOSE: ; use the Grubbs' test for outlers on an input data set, see http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm ; ; ; CATEGORY: ; STATS ; ; ; INPUTS: ; dat: the data to test for outliers ; ; ; KEYWORD PARAMETERS: ; CONF: the confidence level to run the test at ; PLOT: plot the diagnostic plot ; WORDS: display the answer in words ; CRIT: Output the critical cut off value ; _EXTRA: Extra keywords to plot ; ; ; OUTPUTS: ; 1: There are outliers in the data ; 0: There are no outliers in the data ; ; ; EXAMPLE: ; IDL> print, grubbs(randomn(123, 1000), conf=0.95, /plot) ; 1 ; IDL> print, grubbs(randomn(1027897133, 1000), conf=0.95, /plot) ; 0 ; ; ; ; Restrictions: ; Requires oplot_horiz.pro ; Requires SSW tree ; MODIFICATION HISTORY: ; ; Mon Apr 7 20:50:21 2008, Brian Larsen ; written and tested ; 2008-SEPT-22, Henry (Trae) Winter ; Added CRIT keyword. ; ; ; ;- FUNCTION grubbs, dat, $ CONF = conf, $ PLOT = plot, $ WORDS = words, $ CRIT = CRIT, $ _EXTRA = _extra IF n_elements(dat) LT 3 THEN $ message, /ioerror, 'Not enough points input' IF n_elements(conf) EQ 0 THEN $ conf = 0.95 g_stat = max(dat-mean(dat, /nan))/stddev(dat, /nan) n_dat = n_elements(dat) crit = ((n_dat-1)/sqrt(n_dat)) * $ sqrt(t_cvf(conf/n_dat, n_dat-1)^2 / $ (n_dat-2+t_cvf(conf/n_dat, n_dat-2)^2)) IF keyword_set(words) THEN BEGIN IF g_stat GT crit THEN $ print, 'There is at least outlier in the data set at ' + $ trim(conf) + '%' $ ELSE $ print, 'There are no ouliers in the data set at ' + trim(conf) + '%' ENDIF IF keyword_set(plot) THEN BEGIN plot, dat, psym = 1, $ title = trim(conf)+'%', _strict_extra = _extra oplot_horiz, crit, _extra = _extra oplot_horiz, -crit, _extra = _extra ENDIF IF g_stat GT crit THEN return, 1 ELSE return, 0 END