MATSUURA SOICHI, Ph.D.

College of Business Administration, Ritsumeikan University

R | Roulette

概要

質量関数(mass function)について説明する。

mass function

Bernoulli

1が出る確率がp=0.1,0が出る確率が1-p=0.9となるベルヌーイ試行を考える。
#3 Horse Kick data
# need colorspace and vcd packages loaded  (Maybe just make data available)
###################################

#data("HorseKicks")  #make this data available
# famous Bortkiewicz data
HorseKicks <- array(c(109,65,22,3,1),dim = 5,dimnames = list(c("0","1","2","3","4")))
names(dimnames(HorseKicks))<-"nDeaths"

HorseKicks
plot(0:4,HorseKicks,xlab="No. of fatalities",ylab="No. of corps-years", main="Frequencies")  #Histogram of years with given no. of deaths
sum(HorseKicks)

observed.proportions <- HorseKicks/sum(HorseKicks)
observed.proportions

plot(0:4,observed.proportions,xlab="No. of fatalities",ylab="% of corps-years",main="Relative Frequencies",ylim=c(0,1.1))
     
#################
# let's find a particular Poisson probability, using the formula

(0.8^2)*exp(-0.8)/factorial(2)   #= P(X=2) when lambda = 0.8
dpois(2,0.8)  # same


# Now let's get the five probabilities P(X=0),.... P(X=4)
# Here are three ways to do this:
# 1. use a for-loop (slow, brute-force)
# 2. use vectors (faster, elegant)
# 3. use the built-in function - nice if there is one, but if not you
#    have to roll your own using 1. or 2.

# 1. for-loop
expected.proportions <- rep(0,5)  # initialize a vector of length 5 containing zeros
for(w in 0:4){
expected.proportions[w+1] <- (0.8^w)*exp(-0.8)/factorial(w)}   #= P(X=w) when lambda = 0.8
# use [w+1] because vector entries are indexed from v[1] to v[ length(v) ]  ( NOT from v[0] )
expected.proportions

# 2. using vectors
# now let's get them without a for-loop (much faster)
v <- 0:4
expected.proportions <- (0.8^v)*exp(-0.8)/factorial(v)   #= P(X=v) for v=0,1,2,3,4  lambda = 0.8
expected.proportions

# 3. using the built-in function
#now lets get them just using the built in function

expected.proportions <- dpois(0:4, 0.8)
expected.proportions
#####################################################

# All of that was with lambda = 0.8, but that was arbitrary.
# Now let's think about the right value of lambda for the observed data.

# let's use a for-loop to find P(X=0),..P(X=4) for lambda = 0.1, 0.2,....1.5
for(i in 1:15){
points(0:4,dpois(0:4,i/10),type="l")
     }

expected.proportions <- dpois(0:4,0.1)  #0.1 is an arbitrary value
sum(expected.proportions)
plot(0:4,observed.proportions,xlab="No. of fatalities",ylab="% of corps-years",main="Relative Frequencies",ylim=c(0,1.1))
points(0:4,expected.proportions,col="red",pch=3)

#put all on one plot

plot(0:4,observed.proportions,xlab="No. of fatalities",ylab="% of corps-years",main="Relative Frequencies",ylim=c(0,1.1))
for(i in 1:15){
points(0:4,dpois(0:4,i/10),type="l")
     }

plot(0:4,observed.proportions,xlab="No. of fatalities",ylab="% of corps-years",main="Relative Frequencies",ylim=c(0,1.1))
points(0:4,dpois(0:4,0.6),type="l")

109*0 + 65*1 + 22*2 + 3*3 + 1*4  # total no. of horsekicks = 122

sum(HorseKicks*seq(0,4))  # easier way to same thing

expected.proportions <- dpois(0:4,122/200)
points(0:4,expected.proportions,col="green",pch=3)

############

# War data from Larsen and Marx p.200
Wars <- array(c(223,142,48,15,4),dim = 5,dimnames = list(c("0","1","2","3","4+")))
names(dimnames(Wars))<-"nStarting"

Wars
plot(0:4,Wars,xlab="No. of wars starting",ylab="No. of years", main="Frequencies")  #Histogram of years with given no. of deaths
sum(Wars)

observed.proportions <- Wars/sum(Wars)
observed.proportions

plot(0:4,observed.proportions,xlab="No. of wars starting",ylab="% of years",main="Relative Frequencies",ylim=c(0,1.1))
     

# Try out different values
expected.proportions <- dpois(0:4,0.1)  #0.1 is an arbitrary value
sum(expected.proportions)
plot(0:4,observed.proportions,xlab="No. of wars starting",ylab="% of years",main="Relative Frequencies",ylim=c(0,1.1))
points(0:4,expected.proportions,col="red",pch=3)

#put all on one plot

plot(0:4,observed.proportions,xlab="No. of wars starting",ylab="% of years",main="Relative Frequencies",ylim=c(0,1.1))
for(i in 1:15){
points(0:4,dpois(0:4,i/10),type="l")
     }

plot(0:4,observed.proportions,xlab="No. of wars starting",ylab="% of years",main="Relative Frequencies",ylim=c(0,1.1))
points(0:4,dpois(0:4,0.6),type="l")

223*0 + 142*1 + 48*2 + 15*3 + 4*4  # total no. of wars

sum(Wars*seq(0,4))  # easier way to same thing

plot(0:4,observed.proportions,xlab="No. of wars starting",ylab="% of years",main="Relative Frequencies",ylim=c(0,1.1))
expected.proportions <- dpois(0:4,299/432)
points(0:4,expected.proportions,col="green",pch=3)

PAGE TOP