R | Roulette
概要
質量関数(mass function)について説明する。
mass function
Bernoulli
1が出る確率がp=0.1,0が出る確率が1-p=0.9となるベルヌーイ試行を考える。#3 Horse Kick data # need colorspace and vcd packages loaded (Maybe just make data available) ################################### #data("HorseKicks") #make this data available # famous Bortkiewicz data HorseKicks <- array(c(109,65,22,3,1),dim = 5,dimnames = list(c("0","1","2","3","4"))) names(dimnames(HorseKicks))<-"nDeaths" HorseKicks plot(0:4,HorseKicks,xlab="No. of fatalities",ylab="No. of corps-years", main="Frequencies") #Histogram of years with given no. of deaths sum(HorseKicks) observed.proportions <- HorseKicks/sum(HorseKicks) observed.proportions plot(0:4,observed.proportions,xlab="No. of fatalities",ylab="% of corps-years",main="Relative Frequencies",ylim=c(0,1.1)) ################# # let's find a particular Poisson probability, using the formula (0.8^2)*exp(-0.8)/factorial(2) #= P(X=2) when lambda = 0.8 dpois(2,0.8) # same # Now let's get the five probabilities P(X=0),.... P(X=4) # Here are three ways to do this: # 1. use a for-loop (slow, brute-force) # 2. use vectors (faster, elegant) # 3. use the built-in function - nice if there is one, but if not you # have to roll your own using 1. or 2. # 1. for-loop expected.proportions <- rep(0,5) # initialize a vector of length 5 containing zeros for(w in 0:4){ expected.proportions[w+1] <- (0.8^w)*exp(-0.8)/factorial(w)} #= P(X=w) when lambda = 0.8 # use [w+1] because vector entries are indexed from v[1] to v[ length(v) ] ( NOT from v[0] ) expected.proportions # 2. using vectors # now let's get them without a for-loop (much faster) v <- 0:4 expected.proportions <- (0.8^v)*exp(-0.8)/factorial(v) #= P(X=v) for v=0,1,2,3,4 lambda = 0.8 expected.proportions # 3. using the built-in function #now lets get them just using the built in function expected.proportions <- dpois(0:4, 0.8) expected.proportions ##################################################### # All of that was with lambda = 0.8, but that was arbitrary. # Now let's think about the right value of lambda for the observed data. # let's use a for-loop to find P(X=0),..P(X=4) for lambda = 0.1, 0.2,....1.5 for(i in 1:15){ points(0:4,dpois(0:4,i/10),type="l") } expected.proportions <- dpois(0:4,0.1) #0.1 is an arbitrary value sum(expected.proportions) plot(0:4,observed.proportions,xlab="No. of fatalities",ylab="% of corps-years",main="Relative Frequencies",ylim=c(0,1.1)) points(0:4,expected.proportions,col="red",pch=3) #put all on one plot plot(0:4,observed.proportions,xlab="No. of fatalities",ylab="% of corps-years",main="Relative Frequencies",ylim=c(0,1.1)) for(i in 1:15){ points(0:4,dpois(0:4,i/10),type="l") } plot(0:4,observed.proportions,xlab="No. of fatalities",ylab="% of corps-years",main="Relative Frequencies",ylim=c(0,1.1)) points(0:4,dpois(0:4,0.6),type="l") 109*0 + 65*1 + 22*2 + 3*3 + 1*4 # total no. of horsekicks = 122 sum(HorseKicks*seq(0,4)) # easier way to same thing expected.proportions <- dpois(0:4,122/200) points(0:4,expected.proportions,col="green",pch=3) ############ # War data from Larsen and Marx p.200 Wars <- array(c(223,142,48,15,4),dim = 5,dimnames = list(c("0","1","2","3","4+"))) names(dimnames(Wars))<-"nStarting" Wars plot(0:4,Wars,xlab="No. of wars starting",ylab="No. of years", main="Frequencies") #Histogram of years with given no. of deaths sum(Wars) observed.proportions <- Wars/sum(Wars) observed.proportions plot(0:4,observed.proportions,xlab="No. of wars starting",ylab="% of years",main="Relative Frequencies",ylim=c(0,1.1)) # Try out different values expected.proportions <- dpois(0:4,0.1) #0.1 is an arbitrary value sum(expected.proportions) plot(0:4,observed.proportions,xlab="No. of wars starting",ylab="% of years",main="Relative Frequencies",ylim=c(0,1.1)) points(0:4,expected.proportions,col="red",pch=3) #put all on one plot plot(0:4,observed.proportions,xlab="No. of wars starting",ylab="% of years",main="Relative Frequencies",ylim=c(0,1.1)) for(i in 1:15){ points(0:4,dpois(0:4,i/10),type="l") } plot(0:4,observed.proportions,xlab="No. of wars starting",ylab="% of years",main="Relative Frequencies",ylim=c(0,1.1)) points(0:4,dpois(0:4,0.6),type="l") 223*0 + 142*1 + 48*2 + 15*3 + 4*4 # total no. of wars sum(Wars*seq(0,4)) # easier way to same thing plot(0:4,observed.proportions,xlab="No. of wars starting",ylab="% of years",main="Relative Frequencies",ylim=c(0,1.1)) expected.proportions <- dpois(0:4,299/432) points(0:4,expected.proportions,col="green",pch=3)