;+
; NAME:
; grubbs
;
;
; PURPOSE:
; use the Grubbs' test for outlers on an input data set, see http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
;
;
; CATEGORY:
; STATS
;
;
; INPUTS:
; dat: the data to test for outliers
;
;
; KEYWORD PARAMETERS:
; CONF: the confidence level to run the test at
; PLOT: plot the diagnostic plot
; WORDS: display the answer in words
; CRIT: Output the critical cut off value
; _EXTRA: Extra keywords to plot
;
;
; OUTPUTS:
; 1: There are outliers in the data
; 0: There are no outliers in the data
;
;
; EXAMPLE:
; IDL> print, grubbs(randomn(123, 1000), conf=0.95, /plot)
; 1
; IDL> print, grubbs(randomn(1027897133, 1000), conf=0.95, /plot)
; 0
;
;
;
; Restrictions:
; Requires oplot_horiz.pro
; Requires SSW tree
; MODIFICATION HISTORY:
;
; Mon Apr 7 20:50:21 2008, Brian Larsen
; written and tested
; 2008-SEPT-22, Henry (Trae) Winter
; Added CRIT keyword.
;
;
;
;-
FUNCTION grubbs, dat, $
CONF = conf, $
PLOT = plot, $
WORDS = words, $
CRIT = CRIT, $
_EXTRA = _extra
IF n_elements(dat) LT 3 THEN $
message, /ioerror, 'Not enough points input'
IF n_elements(conf) EQ 0 THEN $
conf = 0.95
g_stat = max(dat-mean(dat, /nan))/stddev(dat, /nan)
n_dat = n_elements(dat)
crit = ((n_dat-1)/sqrt(n_dat)) * $
sqrt(t_cvf(conf/n_dat, n_dat-1)^2 / $
(n_dat-2+t_cvf(conf/n_dat, n_dat-2)^2))
IF keyword_set(words) THEN BEGIN
IF g_stat GT crit THEN $
print, 'There is at least outlier in the data set at ' + $
trim(conf) + '%' $
ELSE $
print, 'There are no ouliers in the data set at ' + trim(conf) + '%'
ENDIF
IF keyword_set(plot) THEN BEGIN
plot, dat, psym = 1, $
title = trim(conf)+'%', _strict_extra = _extra
oplot_horiz, crit, _extra = _extra
oplot_horiz, -crit, _extra = _extra
ENDIF
IF g_stat GT crit THEN return, 1 ELSE return, 0
END