clear all; % --- The general parameters --- % Jn = 4; % Number of columns n = 5000; % Number of protein pairs Th = (1*n)/4; % Set the first 1/4 protein pairs as interacting. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Generate simulated data: an example data as in paper % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% mu1 = [5 7 10; 4, 6, 7; 60, 65, 70; 10, 15, 20;15 17 20; 14, 16, 17; 70, 75, 80; 20, 25, 30;25 27 30; 24, 26, 27; 80, 85, 90; 30, 35, 40;35 37 40; 34, 36, 37; 90, 95, 100; 40, 45, 50]'; % configurations for "interact" for 4 columns. sigma1 = [1 1.5 2; 0.5 1 1.5; 1 3 5; 1 2 3;1 1.5 2; 0.5 1 1.5; 1 3 5; 1 2 3;1 1.5 2; 0.5 1 1.5; 1 3 5; 1 2 3;1 1.5 2; 0.5 1 1.5; 1 3 5; 1 2 3]'; mu0 = [2 4 4; 3 4 2; 55 50 58; 8 5 7;12 14 14; 13 14 12; 65 60 68; 18 15 17;22 24 24; 23 24 22; 75 70 78; 28 25 27;32 34 34; 33 34 32; 85 80 88; 38 35 37]'; % mu0 = [2 4 5; 3 4 4; 55 61 60; 8 12 10]'; sigma0 = [4 3 2; 3 1.5 1; 10 5 3; 7 4 2;4 3 2; 3 1.5 1; 10 5 3; 7 4 2;4 3 2; 3 1.5 1; 10 5 3; 7 4 2;4 3 2; 3 1.5 1; 10 5 3; 7 4 2]'; % Wp = [0.2, 0.3, 0.5; 0.1, 0.3, 0.6; 0.1, 0.2, 0.7; 0.2, 0.6, 0.2;0.2, 0.3, 0.5; 0.1, 0.3, 0.6; 0.1, 0.2, 0.7; 0.2, 0.6, 0.2;0.2, 0.3, 0.5; 0.1, 0.3, 0.6; 0.1, 0.2, 0.7; 0.2, 0.6, 0.2;0.2, 0.3, 0.5; 0.1, 0.3, 0.6; 0.1, 0.2, 0.7; 0.2, 0.6, 0.2]'; zid = ones(n,1); zid(Th+1:end) = 0; Num = Th; y1 = []; for j = 1:Jn u = unifrnd(0,1,n,1); index = ones(n,1); index(u>Wp(1,j) & u<=Wp(1,j)+Wp(2,j))=2; index(u>Wp(1,j)+Wp(2,j))=3; % zi is the index for mixing which normal distr. mu = zeros(n,1); sigma = zeros(n,1); mu(index==1) = mu1(1,j) + (mu0(1,j)-mu1(1,j))*(zid(index==1)==0); mu(index==2) = mu1(2,j) + (mu0(2,j)-mu1(2,j))*(zid(index==2)==0); mu(index==3) = mu1(3,j) + (mu0(3,j)-mu1(3,j))*(zid(index==3)==0); sigma(index==1) = sigma1(1,j) + (sigma0(1,j)-sigma1(1,j))*(zid(index==1)==0); sigma(index==2) = sigma1(2,j) + (sigma0(2,j)-sigma1(2,j))*(zid(index==2)==0); sigma(index==3) = sigma1(3,j) + (sigma0(3,j)-sigma1(3,j))*(zid(index==3)==0); Tempy = normrnd(mu,sigma); y1 = [y1 Tempy]; end y2 = y1; % induce errors for input data: randomly revserve pxER interacting protein pairs as non-interacting, and pxERx3 non-interacting as interacting % miss4 = []; miss04 = [];ER = 25;p=1; for j = 1:Jn miss = unidrnd(Num, [1,ER*p]); miss4 = [miss4 miss];% 25 error for 1250 interacting. y1(miss,j) = min(mu0(:,j))-2*min(sigma0(:,j))+unidrnd(min(sigma0(:,j)),[1,ER*p]); miss0 = Num+unidrnd(n-Num,[1,ER*p*(n-Num)/Num]); miss04 = [miss04 miss0]; % 50 error for 3750 non-interacting y1(miss0,j) = max(mu1(:,j))+2*max(sigma1(:,j))-unidrnd(fix(max(sigma1(:,j))),[1,ER*p*(n-Num)/Num]); end miss1Rate = length(unique(miss4))/Num; miss0Rate = length(unique(miss04))/(n-Num); % Let yn = y2 if testing on error free data, or let yn=y1 if testing on error-induced data --- % yn = y1; y = (yn-repmat(mean(yn),[n,1]))./repmat(std(yn),[n,1]); % normalization %%%%%%%%%%%%%%%%%% % NBTL algorithm % %%%%%%%%%%%%%%%%%% % --- define hyperparameters --- % aPsi = 1; bPsi = 1; PsiV = []; Psi = betarnd(aPsi,bPsi); % If a hyperprior for Psi is set %Psi = normrnd(0.25,0.025); %If a prior known Psi is set Mu = 0; Ga = 1; at = 1; bt = 1; alpha = 1; T = 20; cid = (1:T)'; % --- define initial parameter values --- % tau = gamrnd(at,1/bt,T,Jn); Vh = betarnd(1,alpha,T-1,1); Vh(T) = 1; Kap = 1; Theta0 = normrnd(Mu, 1/sqrt(Ga), T, Jn); P = unifrnd(normcdf(0, 0, repmat(1/sqrt(Kap),T,Jn)),1); P(P>0.999) = 0.999; Delta = norminv(P, 0, repmat(1/sqrt(Kap),T,Jn)); Theta1 = Delta+Theta0; S = round(unifrnd(1,T,n,Jn)); nn = 0; ct = 0; thin = 10; z_old = zeros(n,1); Conv = []; z_Iter = [];z2_Iter = [];V_Iter = []; pz_Iter = zeros(1,length(y1)); while nn<4000 % --- Update zi --- % Tau_Theta1 = []; Tau_Theta0 = []; for j = 1:Jn Tau_Theta1 = [Tau_Theta1 tau(S(:,j),j).*((y(:,j)-Theta1(S(:,j),j)).^2)/2]; % dim: nx1 Tau_Theta0 = [Tau_Theta0 tau(S(:,j),j).*((y(:,j)-Theta0(S(:,j),j)).^2)/2]; end A = log(Psi)-sum(Tau_Theta1,2); B = log(1-Psi)-sum(Tau_Theta0,2); maxV=max([A;B]); A1 = exp(A-maxV); B1 = exp(B-maxV); pz = A1./(A1+B1); % pz = 1./(B1./A1+1); z = zeros(n,1); %z2 = ones(n,1); u = unifrnd(0,1,n,1); % uniform random variable z(uh)); end Vh(1:T-1) = betarnd(1 + aV, alpha + bV); Vh(cid0.999)=0.999; % --- update Theta_h0j --- % for h = 1:T zD = z*Delta(h,:); V0 = 1./(Ga+tau(h,:).*sum(S==h,1)); % 1xJ dim E0 = V0.*(Mu*Ga+tau(h,:).*sum((S==h).*(y-zD),1)); Theta0(h,:) = normrnd(E0, sqrt(V0)); end % --- update Delta_hj = Theta_h1j-Theta_h0j --- % for h = 1:T V1 = 1./(Kap+tau(h,:).*(sum((S==h).*repmat(z,1,Jn),1))); % 1xJ dim E1 = V1.*(tau(h,:).*sum((S==h).*repmat(z,[1,Jn]).*(y-repmat(Theta0(h,:),[n,1])),1)); % 1xJ dim P = unifrnd(normcdf(0,E1,sqrt(V1)),1); P(P>0.999) = 0.999; Delta(h,:) = norminv(P,E1,sqrt(V1)); end Theta1 = Delta+Theta0; % --- update tau_h --- % for h = 1:T at1 = at + sum(S==h,1)/2; bt1 = bt + sum(((S==h).*(y-repmat(Theta0(h,:),[n,1])-z*Delta(h,:))).^2,1)/2; tau(h,:) = gamrnd(at1,1./bt1); end % --- Results collection --- % % collect simulated distributions if nn >=1000 ct = ct+1; pz_Iter = pz_Iter+pz'; % for large data set. end nn = nn+1; if mod(nn,1000) == 0 nn/1000 end Conv = [Conv sum(z-z_old)]; z_old = z; end Post_z = pz_Iter/ct; % The posterior probability for z=1 from our NBTL. %%%%%%%%%%%%%%%%%%% % Results summary % %%%%%%%%%%%%%%%%%%% % Calculate the FN and FP rates for our NBTL using the alternative threshold that maximally separates % two parts of bimodal distribution. n = length(Post_z);Jn=4; [f,x]=ksdensity(Post_z,'npoints',50); f1=f(x>0&x<0.95); Th1 = x(f==min(f1)); len = length(Th1);Th1 = Th1(fix((len-1)/2)+1); pos1 = find(Post_z>=Th1); pos0 = find(Post_z=Num+1))/(n-Num);FP1 = 1-TN; % Calculate the FN and FP rates for NBTL using 0.5 threshold. Th2 = 0.5; pos1 = find(Post_z>=Th2); pos0 = find(Post_z=Num+1))/(n-Num);FP2 = 1-TN; % Calculate the FN and FP rates for naive using the alternative threshold. y_naive = ones(n,1); for j = 1:Jn y_naive = y_naive.*yn(:,j); end [f0,x0]=ksdensity(y_naive,'npoints',50); Pmax = find(f0==max(f0)); Diff_f0=diff(f0); Pcom=find(Diff_f0>0); Th0=x0(Pcom(find(Pcom-Pmax>0,1))); pos1 = find(y_naive>=Th0); pos0 = find(y_naive=Num+1))/(n-Num);FP0 = 1-TN; % FNFPs are the summary of the FN and FP rates. The first two % elements are the FN and FP rates for NBTL using 0.5 threshold, the next % two are the FN and FP rates for NBTL using the alternative threshold, and % the last two are the FN and FP rates for naive using the alternative % threshold. FNFPs = [FN2 FP2 FN1 FP1 FN0 FP0];