松浦総一研究室

R | mass function

質量関数（mass function）について説明する。

二項分布 / Binomial Distribution

$f(k)= \left ( \begin{array}{c} n \\ k \end{array} \right )p^k q^{n-k}, \ \ \ p+q=1$

p <- 0.4
n <- 8
dbinom(8,n,p) # P(X=8) = p^n
## 確認してみよう
p^n
 
dbinom(0,n,p) # P(X=0) = (1-p)^n
# こちらも確認
(1-p)^n

xseq <- 0:n

yseq <- dbinom(xseq,n,p) 
sum(yseq) # チェック

#密度関数をプロット
plot(xseq,yseq,xlab="Values of x",ylim=c(0,1), ylab="P(X=x)",
	main=paste("pmf for Binomial distribution with n=",n," and p=",p))

ベルヌーイ分布 / Bernoulli

1が出る確率がp=0.1，0が出る確率が1-p=0.9となるベルヌーイ試行を考える。このような試行が作り出す離散分布をベルヌーイ分布はといい，確率pで1を，確率q=1-pで0を作り出す離散確率分布である。ベルヌーイ分布の確率分布関数は，以下の式で表される。
$f(k;p)=p^k + (1-p)^{1-k }\ \ \ \mathbf{for}\ \ \ k \in \{ 0,1\}$
ここで，dbinomは二項分布を計算する関数であり，dbinom(1,サイズ,1がでる確率)とすることで，つまり，

p <- 0.1
dbinom(1,1,p) # P(X=1) = p 
dbinom(0,1,p) # P(X=0) = 1-p

Bernoulli(p)は，Binomial(n=1, p)と同じ意味である。変なあたいを入れて，結果を確認する。

dbinom(2,1,p) # P(X=2) = 0 
dbinom(-1,1,p) # P(X=-1) = 0
dbinom(0.5,1,p) # P(X=0.5) = 0

xseqという変数に-2から4の整数を入れる。 yseqという変数に1回の試行回数でxsecがpの確率で生じる密度関数を入れる。この変数をプロットする。

plot(x軸, y軸, xlab="x軸のタイトル", ylim=c(y軸の下限,上限), ylab="P(X=x)", main=paste("表のタイトル"))

となっている。

xseq <- -2:4  # including values other than 0 and 1 
                   #  is unnecessary
yseq <- dbinom(xseq,1,p)
plot(xseq,yseq,xlab="Values of x",ylim=c(0,1), ylab="P(X=x)",
	main=paste("pmf for Bernoulli distribution with p=",p))

幾何分布 / Geometric Distribution

Rは幾何分布にGoldberger (1991) "A Course in Econometrics"とはわずかに異なる定義を与えている。 Goldbergerの定義では，最初のヘッドを含めたトスの回数として定義しているが，Rでは最初のヘッド前のトス数としている。
$F(x)=\begin{cases} 0 & \ \ \mathrm{if}\ \ x <1 \\ 1-(1-p)^{[x]} & \ \ \mathrm{if}\ \ x \geq 1 \end{cases}$
ここで，[x]はx以下の最大の整数を表している。上記の分布関数にしたがって，確認してみよう。

p <- 0.4

dmygeom <- function(x,p){dgeom(x-1,p)}
dmygeom(1,p) # P(X=1) = p
p #check:

dmygeom(2,p) # P(X=1) = p(1-p)
p*(1-p) # check:

xseq <- 0:10  # 10 is arbitrary here
yseq <- dmygeom(xseq,p) 
sum(yseq) # Why is this < 1?

#Plot of the mass function
plot(xseq,yseq,xlab="Values of x",ylim=c(0,1), ylab="P(X=x)",
	main=paste("pmf for Geometric distribution with  p=",p))

Discrete Uniform

## R doesn't have a function for this. ## Let's build our own:

# First attempt:
ddiscunif <- function(x,n){1/n}
#
ddiscunif(1,5)
ddiscunif(2,5)
# looks ok so far... but
ddiscunif(6,5) # not so good : should be 0
ddiscunif(-1,5) # not so good : should be 0

# second attempt
ddiscunif <- function(x,n){
	sum(rep(x,n)==(1:n))/n
	}
# How does this work?

#
ddiscunif(1,5)
ddiscunif(2,5)
# looks ok so far
ddiscunif(6,5) # better
ddiscunif(-1,5) # better

# but... what about if we feed in a vector:
ddiscunif(c(0,1,2),5)
# not so good. 
# we want 0, 0.2,0.2

# third attempt:
# safe for all inputs and can take vectors

ddiscunif <- function(x,n){
	out <- rep(NA,length(x))
for(i in 1:length(x)){
	out[i] <- sum(rep(x[i],n)==(1:n))/n
    	}
	return(out)
	}
## Can you see how this is working?

ddiscunif(1,5)
ddiscunif(2,5)
# looks ok so far
ddiscunif(6,5) # better
ddiscunif(-1,5) # better

# but... what about if we feed in a vector:
ddiscunif(c(0,1,2),5)

### Best approach using
### built "%in%" function (due to Danping)

ddiscunif <- function(x,n){
    index <- x%in%(1:n)
    # match each element of x in the set {1,2,...,n}
    # return logical values, i.e. TRUE or FALSE.
      out <- rep(0, length(x))
      out[index] <- 1/n
    # "legitimate" points of x receive the point mass 1, others get 0.
      return(out)
  }


n <- 36
xseq <- -1:38
yseq <- ddiscunif(xseq,n)

plot(xseq,yseq,xlab="Values of x",ylim=c(0,1), ylab="P(X=x)",
	main=paste("pmf for Discrete Uniform distribution with  n=",n))

Poisson Distribution

lambda <- 1

dpois(0,lambda)
# check:
lambda^0*exp(-lambda)/factorial(0)

dpois(1,lambda)
# check:
lambda^1*exp(-lambda)/factorial(1)

## Why is this the same? (Hint - try a different lambda)

dpois(2,lambda)
# check:
lambda^2*exp(-lambda)/factorial(2)

## Some silly values:
dpois(-1,lambda)
dpois(1.5,lambda) # gives a warning



xseq <- 0:10
yseq <- dpois(xseq,lambda)
sum(yseq)
## Is it really 1?
sum(yseq)-1  # How do we explain this? Rounding error

options(digits=15) #This increases the number of digits printed
sum(yseq)

plot(xseq,yseq,xlab="Values of x",ylim=c(0,1), ylab="P(X=x)",
	main=paste("pmf for Poisson distribution with  lambda=",lambda))

# Try some other lambdas:
lambda <- 1

xseq <- 0:10
yseq <- dpois(xseq,lambda)
plot(xseq,yseq,xlab="Values of x",ylim=c(0,1), ylab="P(X=x)",
	main=paste("pmf for Poisson distribution with  lambda=",lambda))


lambda <- 2

xseq <- 0:10
yseq <- dpois(xseq,lambda)
plot(xseq,yseq,xlab="Values of x",ylim=c(0,1), ylab="P(X=x)",
	main=paste("pmf for Poisson distribution with  lambda=",lambda))