Difference between revisions of "R tutorial"

input prompt

>

> help(rnorm)
>
> ?rnorm
>
> ?binom     
No documentation for 'binom' in specified packages and libraries:
you could try '??binom'
> ??binom
>
> ?Binomial
>
> ?"+"
> ?"~"
> ?"["
> ?"%in%"
>
> ?var
> getwd()
[1] "/Users/steipe/R"
> setwd("~") 
> getwd()
[1] "/Users/steipe"
> setwd("~/../chen")  
> getwd()
[1] "/Users/chen"
> setwd("/Users/steipe/abc/R_samples")  
> getwd()
[1] "Users/steipe/abc/R_samples"
> ?Startup
> ls() 
character(0)
>
> a <- 1; b <-2; eps <- 0.0001
> ls() 
[1] "a"   "b"   "eps"
>
> rm(a) 
> ls()
[1] "b"   "eps"
>
rm(list= ls()) 
> ls() 
character(0)
>
> library()
> search()
 [1] ".GlobalEnv"        "tools:RGUI"        "package:stats"     "package:graphics" 
 [5] "package:grDevices" "package:utils"     "package:datasets"  "package:methods"  
 [9] "Autoloads"         "package:base"
> ?vignette
>
?data
data(package="seqinr")   # list the available data
data(aaindex)            # load ''aaindex''
?aaindex                 # what is this?
aaindex$FASG890101       # two of the indices ...
aaindex$PONJ960101

# plot amino acid codes by hydrophobicity and volume
plot(aaindex$FASG890101$I, aaindex$PONJ960101$I, xlab="hydrophobicity", ylab="volume", type="n")
text(aaindex$FASG890101$I, aaindex$PONJ960101$I, labels=a(names(aaindex$FASG890101$I)))
choosebank("swissprot")
query("mySeq", "N=MBP1_YEAST")
mbp1 <- getSequence(mySeq)
closebank()
x <- AAstat(mbp1[[1]])
barplot(sort(x$Compo))
if (!require(seqinr)) {
    install.packages("seqinr")
    library(seqinr)
}
if (!require(sos)) {
    install.packages("sos")
    library(sos)
}

findFn("moving average")
# sample script:
# define a vector
a <- c(1, 1, 2, 3, 5, 8, 13)
# list its contents
a
# calculate the mean of its values
mean(a)
source("sample.R")
# sample script:
# define a vector
a <- c(1, 1, 2, 3, 5, 8, 13)
# list its contents
print(a)
# calculate the mean of its values
print(mean(a))
?sink
5
5 + 3
5 + 1 / 2
3 * 2 + 1
3 * (2 + 1)
2^3 # Exponentiation
8 ^ (1/3) # Third root via exponentiation
7 %% 2  # Modulo operation (remainder of integer division)
7 %/% 2 # Integer division
cos(pi) #"pi" is a predefined constant.
sin(pi) # Note the rounding error. This number is not really different from zero.
sin(30 * pi/180) # Trigonometric functions use radians as their argument - this conversion calculates sin(30 degrees)
exp(1) # "e" is not predefined, but easy to calculate.
log(exp(1)) # functions can be arguments to functions - they are evaluated from the inside out.
log(10000) / log(10) # log() calculates natural logarithms; convert to any base by dividing by the log of the base. Here: log to base 10.
exp(complex(r=0, i=pi)) #Euler's identity
complex(1)
complex(4)
complex(1, 2) # imaginary part missing - defaults to zero
complex(1, 2, 3) # one complex number
complex(4, 2, 3) # four complex numbers
complex(real = 0, imaginary = pi) # defining via named parameters
complex(imaginary = pi, real = 0) # same thing - if names are used, order is not important
complex(re = 0, im = pi) # names can be abbreviated ...
complex(r = 0, i = pi) # ... to the shortest string that is unique among the named parameters. Use this with discretion to keep your code readable.
complex(i = pi, 1, 0) # Think: what have I done here? Why does this work?
exp(complex(i = pi, 1, 0)) # (The complex number above is the same one as in Euler's identity.)
?make.names
?reserved
a <- 5
a
a + 3
b <- 8
b
a + b
a == b # not assignment: equality test
a != b # not equal
a < b  # less than
# I don't like...
col <- c("red", "grey")
hist(rnorm(200), col=col)

# I prefer...
rgStripes <- c("red", "grey")
barplot(1:10, col=rgStripes)
info <- function(x) {
    print(x)  
    cat("str:    ")                
    str(x)  
    cat("mode:   ", mode(x), "\n")
    cat("typeof: ", typeof(x), "\n")
    cat("class:  ", class(x), "\n")
    # if there are attributes, print them too
    if (! is.null(attributes(x))) {
        cat("attributes:\n")
        print(attributes(x))
    }
}
info( 3 > 5 ) # Note: a > 5 is a logical expression, its value is FALSE.
info( 3 < 5 ) 

info( 3.0 )  # Double precision floating point number
info( 3.0e0 )  # Same value, exponential notation

info( 3 )  # Note: numbers are double precision floats by default.
info( as.integer(3) )  # If we really want an integer, we must coerce to type integer.

info( as.character(3) )  # Forcing the number to be interpreted as a character.

# More coercions. For each of these, first think what result you would expect:
info( as.numeric("3") )   # character as numeric
info( as.numeric("3.141592653") )   # string as numeric
info( as.numeric(pi) )   # not a string, but a predefined constant
info( as.numeric("pi") )   # another string as numeric. Ooops - what went wrong?

info( as.complex(1) )  
info( as.logical(0) )  
info( as.logical(1) )  
info( as.logical(-1) )  
info( as.logical(pi) )   # any non-zero number is TRUE ...
info( as.logical("pi") )   # ... but not non-numeric types. NA is "Not Available".

info( as.character(pi) )

info( Inf )
info( NaN )
info( NA )
info( NULL )
info( as.factor("M") )     # factor
info( Sys.time() )         # time
info( letters )            # inbuilt
info( 1:4 )                # numeric vector
info( matrix(1:4, nrow=2)) # numeric matrix
info( list(arabic = 1:3, roman = c("I", "II", "III")))
info( data.frame(arabic = 1:3, roman = c("I", "II", "III"), stringsAsFactors=FALSE))
info( a ~ b )              # a formula
info( info )               # the function itself
#Create a vector and list its contents and length:
f <- c(1, 1, 3, 5, 8, 13, 21)
f
length(f)

# Various ways to retrieve values from the vector.
f[1] # By index: "1" is first element. 
f[length(f)] # length() is the index of the last element.
1:4 # This is the range operator
f[1:4] # using the range operator (it generates a sequence and returns it in a vector)
f[4:1] # same thing, backwards
seq(from=2, to=6, by=2) # The seq() function is a flexible, generic way to generate sequences
seq(2, 6, 2) # Same thing: arguments in default order
f[seq(2, 6, 2)]

# since a scalar is a vector of length 1, does this work?
5[1]

# ...using an index vector with positive indices
a <- c(1, 3, 4, 1) # the elements of index vectors must be valid indices of the target vector. The index vector can be of any length.
f[a] # Here, four elements are retrieved from f[]

# ...using an index vector with negative indices
a <- -(1:4) # If elements of index vectors are negative integers, the corresponding elements are excluded.
f[a] # Here, the first four elements are omitted from f[]
f[-((length(f)-3):length(f))] # Here, the last four elements are omitted from f[]

# ...using a logical vector
f>4 # A logical expression operating on the target vector returns a vector of logical elements. It has the same length as the target vector.
f[f>4]; # We can use this logical vector to extract only elements for which the logical expression evaluates as TRUE

# Example: extending the Fibonacci series for three steps. 
# Think: How does this work? What numbers are we adding here and why does the result end up in the vector?
f <- c(f, f[length(f)-1] + f[length(f)]); f 
f <- c(f, f[length(f)-1] + f[length(f)]); f 
f <- c(f, f[length(f)-1] + f[length(f)]); f 

# coercion
c(1, 2.0, "3", TRUE)
[1] "1"    "2"    "3"    "TRUE"
f
f+1
f*2

# computing with two vectors of same length
a <- f[-1]; a # like f[], but omitting the first element
b <- f[1:(length(f)-1)]; b # like f[], but shortened by the least element
c <- a / b # the "golden ratio", phi (~1.61803 or (1+sqrt(5))/2 ), an irrational number, is approximated by the ratio of two consecutive Fibonacci numbers.
c
abs(c - ((1+sqrt(5))/2)) # Calculating the error of the approximation, element by element
a <- 1:12; a
dim(a) <- c(2,6); a
dim(a) <- c(2,2,3); a
dim(a)    # returns a vector
dim(a)[3]  # only the third value of the vector
a <- 1:4
b <- 5:8
c <- rbind(a, b); c
d <- cbind(a, b); d
e <- cbind(d, 9:12); e
e[1,] # first row
e[,2] # second column
e[3,2] # element at index row=3, column = 2
e[3:4, 1:2] # submatrix
pUC19 <- list(size=2686, marker="ampicillin", ori="ColE1", accession="L01397", BanI=c(235, 408, 550, 1647) )
pUC19[[1]]
pUC19[[2]]
pUC19$ori
pUC19$BanI[2]
Name	Size	Marker	Ori	Sites
pUC19	2686	Amp	ColE1	EcoRI, SacI, SmaI, BamHI, XbaI, PstI, HindIII
pBR322	4361	Amp, Tet	ColE1	EcoRI, ClaI, HindIII
pACYC184	4245	Tet, Cam	p15A	ClaI, HindIII

Vectors <- read.table("vectors.tsv", sep="\t", header=TRUE, stringsAsFactors = FALSE)
Vectors
V2 <- edit(Vectors)
Vectors[1, ]
Vectors[2, ]
Vectors[ ,2 ]

Vectors$Name

Vectors$Size > 3000
Vectors$Name[Vectors$Size > 3000]
Vectors$Name[Vectors$Ori != "ColE1"]

Vectors[order(Vectors$Size), ]

grep("Tet", Vectors$Marker)
Vectors[grep("Tet", Vectors$Marker), ]
Vectors[grep("Tet", Vectors$Marker), "Ori"]
as.vector(Vectors[grep("Tet", Vectors$Marker), "Ori"])
biCode <- function(s) { 
	substr(s, 4, 6) <- substr(strsplit(s,"\\s+")[[1]][2], 1, 2)
	return (toupper(substr(s, 1, 5)))
}

biCode("Homo sapiens")              # HOMSA
biCode("saccharomyces cerevisiae")  # SACCE
fibSeq <- function(n) { 
   if (n < 1) { return( c(0) ) }
   else if (n == 1) { return( c(1) ) }
   else if (n == 2) { return( c(1, 1) ) }
   else {
      v <- c(1, 1)
      for ( i in 3:n ) {
         v <- c(v, v[length(v)-1] + v[length(v)])
      }
      return( v )
   }
}
rollDice <- function(len=1, MIN=1, MAX=6) {
	v <- rep(0, len)
    for (i in 1:len) {
    	x <- runif(1, min=MIN, max=MAX)
    	x <- as.integer(x)
    	v[i] <- x
    }
	return(v)
}
rollDice()
table(rollDice(1000))
debug(rollDice)
rollDice(10)
debugging in: rollDice(10)
debug at #1: {
    v <- rep(0, len)
    for (i in 1:len) {
        x <- runif(1, min = MIN, max = MAX)
        x <- as.integer(x)
    	v[i] <- x
    }
    return(v)
}
Browse[2]> 
debug at #2: v <- rep(0, len)
Browse[2]> 
debug at #3: for (i in 1:len) {
    x <- runif(1, min = MIN, max = MAX)
    x <- as.integer(x)
    v[i] <- x
}
Browse[2]> 
debug at #4: x <- runif(1, min = MIN, max = MAX)
Browse[2]> 
debug at #5: x <- as.integer(x)
Browse[2]> x   # Here we examine the current value of x
[1] 4.506351
Browse[2]> 
debug at #6: v[i] <- x
Browse[2]> 
debug at #4: x <- runif(1, min = MIN, max = MAX)
Browse[2]> v
[1] 4      # Aha: as.integer() truncates, but doesn't round!
Browse[2]> Q
rollDice <- function(len=1, MIN=1, MAX=6) {
	v <- rep(0, len)
    for (i in 1:len) {
    	x <- runif(1, min=MIN, max=MAX+1)
    	x <- as.integer(x)
    	v[i] <- x
    }
	return(v)
}
table(rollDice(1000))
# Disclaimer: this function would be better
# written as ...

rollDice <- function(len=1, MIN=1, MAX=6) {
	return(as.integer(runif(len, min=MIN, max=MAX+1)))
}

# Check:
table(rollDice(1000))
# ... since runif() can return a vector of deviates,
# but we would not be able to check the value of
# individual trials.

# Disclaimer 2: the function relies on a side-effect of as.integer(), which is
# to drop the digits after the comma when it converts. More explicit and
# therefore clearer would be to use the function floor() instead. Here, the
# truncation is not a side effect, but the desired behaviour. This is
# actually important: there is no guarantee how as.integer() constructs an
# integer from a float, it could e.g. round, instead of truncating. But rounding
# would give a wrong distribution! An error that may be hard to spot. (You
# can easily try using the round() function and think about how the result is wrong.)

# A better alternative is thus to write:

rollDice <- function(len=1, MIN=1, MAX=6) {
	return(floor(runif(len, min=MIN, max=MAX+1)))
}

# Disclaimer 3
# A base R function exists that already rolls dice in the required way: sample()

table(sample(1:6, 1000, replace=TRUE))

@@ Line 127: / Line 127: @@
-That's all fine, but you will soon notice that '''R''''s help documentation is not all that helpful for newcomers (who need the most help). Here's what you might look for.
+That's all fine, but you will soon notice that '''R''''s help documentation is not all that helpful for newcomers (who need the most help). To illustrate, open the help window for the function {{c|var}}.
+<source lang="rsplus">
+> ?var
+</source>
+Here's what you might look for.
 * The '''Description''' section describes the function in general technical terms.
 * The '''Usage''' section tells you what arguments are required (these don't have defaults), what arguments have defaults, and what the defaults are, and whether additional arguments ("...") are allowed. Often a function comes in several variants, you will find them here.
@@ Line 139: / Line 144: @@
 * Clear commented, examples that relate to the most frequent use cases.
-* Explanations '''why''' a particular function is done in a particular way.
+* Explanations '''why''' a particular function is done in a particular way (e.g. whi the denominator is ''n-1'' for {{c|sd()}} and  {{c|var()}}.
 * Notes on common errors.
 * An exhaustive list of alternatives.

Difference between revisions of "R tutorial"

Revision as of 22:19, 20 April 2015

Contents

The environment

Installation

User interface

Install R Studio

The Help system

Working directory

.Rprofile - startup commands

... unix systems

... Mac OS X systems

...Windows systems

The "Workspace"

Packages

Scripts

Simple commands

Operators

Functions

Variables

Scalar data

Vectors

Matrices

Lists

Data frames

Writing your own functions

Coding style

Debugging

Finishing

Notes

Further reading and resources

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Sections

Tools