FUNCTION post_prob_optbins, M
compile_opt idl2
COMMON FUNC_optbins, data2, N, minM, hist2
IF M LT 5 THEN m = 50
hist2 = histogram(data2, nbins = m)
part1 = N*alog(m) + lngamma(m/2.) - lngamma(N+M/2.)
part2 = -m*lngamma(1/2.) + total(lngamma(hist2+0.5))
RETURN, -(part1 + part2)
END
;+
; NAME:
; optbins
;
;
; PURPOSE:
; calculate the optimum number of histogram bins by balancing the
; likelihood function and the prior probability of the model. Method
; is from Optimal Data-Based Binning for Histograms Kevin H. Knuth
; arXiv:physics/0605197v1 [physics.data-an] 23 May 2006
;
;
; CATEGORY:
; stats
;
;
; INPUTS:
; data: the array to find the optimal number of histogram bins for
;
;
; OPTIONAL INPUTS:
; minM_in: minimum number of bins to consider (default 5)
; maxM: maximum number of bins to consider (default 100)
;
;
; KEYWORD PARAMETERS:
; PLOT: Use David Fannings Histoplot to plot the histogram
; _EXTRA: _strict_extra keywords to histoplot
; HIST: the histogram of the data
;
;
; OUTPUTS:
; optimal number of histogram bins
;
;
; COMMON BLOCKS:
; FUNC_optbins, data2, N, minM, hist2
;
;
; RESTRICTIONS:
; This is not generalized to 2-d histograms yet
;
;
; EXAMPLE:
; IDL> print, optbins(randomn(seed, 1000), /plot)
; 9
;
;
;
; MODIFICATION HISTORY:
;
; Sun Jul 27 15:46:01 2008, Brian Larsen
; written and tested
;
;-
FUNCTION optbins, data, $
minM_in, $
maxM, $
HIST = HIST, $
PLOT = PLOT, $
_EXTRA = _EXTRA
compile_opt idl2
COMMON FUNC_optbins, data2, N, minM, hist2
IF n_params() LT 1 OR n_elements(data) EQ 0 THEN $
message, 'Must input data'
IF n_params() GT 1 AND n_params() NE 3 THEN $
message, 'Usage: ans=optbins(data, [minWidth], [maxWidth])'
IF n_elements(minM_in) EQ 0 THEN $
minM_in = 5
IF n_elements(maxM) EQ 0 THEN $
maxM = 100
minM = minM_in
data2 = data
n = n_elements(data)
ans = AMOEBA( $
scale = 5., $
P0 = [50.], $
FUNCTION_VALUE = fval, $
FUNCTION_NAME = 'post_prob_optbins', $
1e-5)
IF keyword_set(plot) THEN BEGIN
histoplot, data, nbins = (ceil(ans))[0], _STRICT_EXTRA = _EXTRA
ENDIF
RETURN, ceil(ans)
END