R tutorial

input prompt

>

> help(rnorm)
>
> ?rnorm
>
> ?binom     
No documentation for 'binom' in specified packages and libraries:
you could try '??binom'
> ??binom
>
> ?Binomial
>
> ?"+"
> ?"~"
> ?"["
> ?"%in%"
>
> getwd()
[1] "/Users/steipe/R"
> setwd("~") 
> getwd()
[1] "/Users/steipe"
> setwd("~/../chen")  
> getwd()
[1] "/Users/chen"
> setwd("/Users/steipe/abc/R_samples")  
> getwd()
[1] "Users/steipe/abc/R_samples"
> ls() 
character(0)
>
> a <- 1; b <-2; eps <- 0.0001
> ls() 
[1] "a"   "b"   "eps"
>
> rm(a) 
> ls()
[1] "b"   "eps"
>
rm(list= ls()) 
> ls() 
character(0)
>
> library()
> search()
 [1] ".GlobalEnv"        "tools:RGUI"        "package:stats"     "package:graphics" 
 [5] "package:grDevices" "package:utils"     "package:datasets"  "package:methods"  
 [9] "Autoloads"         "package:base"
> ?vignette
>
> ??install
> ?install.packages
> install.packages("seqinr")
--- Please select a CRAN mirror for use in this session ---
trying URL 'http://probability.ca/cran/bin/macosx/contrib/2.13/seqinr_3.0-5.tgz'
Content type 'application/x-gzip' length 4528528 bytes (4.3 Mb)
opened URL
==================================================
downloaded 4.3 Mb

The downloaded packages are in
	/var/folders/dq/dqPEEPbF0ApRU/-Tmp-//RtmpBlw/downloaded_packages
> 
> library(seqinr)
> ls("package:seqinr")
  [1] "a"                       "aaa"                     "AAstat"                 
  [4] "acnucclose"              "acnucopen"               "al2bp"                  
     [...]
[205] "where.is.this.acc"       "words"                   "words.pos"              
[208] "write.fasta"             "zscore"                 
> ?a
> a("Tyr")
[1] "Y"
> choosebank()
 [1] "genbank"       "embl"          "emblwgs"       "swissprot"     "ensembl"      
    [...]
 [31] "refseqViruses"
choosebank("swissprot")
query("seq", "N=MBP1_YEAST")
mbp1 <- getSequence(seq)
closebank()
x <- AAstat(mbp1[[1]])
barplot(sort(x$Compo))
if (!require(seqinr)) {
    install.packages("seqinr")
    library(seqinr)
}
if (!require(sos)) {
    install.packages("sos")
    library(sos)
}

findFn("moving average")
# sample script:
# define a vector
a <- c(1, 1, 2, 3, 5, 8, 13)
# list its contents
a
# calculate the mean of its values
mean(a)
source("sample.R")
# sample script:
# define a vector
a <- c(1, 1, 2, 3, 5, 8, 13)
# list its contents
print(a)
# calculate the mean of its values
print(mean(a))
?sink
5
5 + 3
5 + 1 / 2
3 * 2 + 1
3 * (2 + 1)
2^3 # Exponentiation
8 ^ (1/3) # Third root via exponentiation
7 %% 2  # Modulo operation (remainder of integer division)
7 %/% 2 # Integer division
cos(pi) #"pi" is a predefined constant.
sin(pi) # Note the rounding error. This number is not really different from zero.
sin(30 * pi/180) # Trigonometric functions use radians as their argument - this conversion calculates sin(30 degrees)
exp(1) # "e" is not predefined, but easy to calculate.
log(exp(1)) # functions can be arguments to functions - they are evaluated from the inside out.
log(10000) / log(10) # log() calculates natural logarithms; convert to any base by dividing by the log of the base. Here: log to base 10.
exp(complex(r=0, i=pi)) #Euler's identity
complex(1)
complex(4)
complex(1, 2) # imaginary part missing - defaults to zero
complex(1, 2, 3) # one complex number
complex(4, 2, 3) # four complex numbers
complex(real = 0, imaginary = pi) # defining via named parameters
complex(imaginary = pi, real = 0) # same thing - if names are used, order is not important
complex(re = 0, im = pi) # names can be abbreviated ...
complex(r = 0, i = pi) # ... to the shortest string that is unique among the named parameters. Use this with discretion to keep your code readable.
complex(i = pi, 1, 0) # Think: what have I done here? Why does this work?
exp(complex(i = pi, 1, 0)) # (The complex number above is the same one as in Euler's identity.)
?make.names
?reserved
a <- 5
a
a + 3
b <- 8
b
a + b
a == b # not assignment: equality test
a != b # not equal
a < b  # less than
# I don't like...
col <- c("red", "grey")
hist(rnorm(200), col=col)

# I prefer...
rgStripes <- c("red", "grey")
barplot(1:10, col=rgStripes)
a <- 3 > 5; a; mode(a); typeof(a) # Note: a > 5 is a logical expression, its value is FALSE.
a <- 3 < 5; a; mode(a); typeof(a)

a <- 3.0;   a;  mode(a); typeof(a) # Double precision floating point number
a <- 3.0e0; a;  mode(a); typeof(a) # Same value, exponential notation

a <- 3;     a;  mode(a); typeof(a) # Note: numbers are double precision floats by default.
a <- as.integer(3);  a;  mode(a); typeof(a) # If we really want an integer, we must coerce to type integer.

a <- "3"; a;  mode(a); typeof(a) # Forcing the number to be interpreted as a character.

# More coercions. For each of these, first think what result you would expect:
as.numeric("3") # character as numeric
as.numeric("3.141592653") # string as numeric
as.numeric("pi") # another string as numeric
as.numeric(pi) # not a string, but a predefined constant

as.logical(0)
as.logical(1)
as.logical(-1)
as.logical(pi) # any non-zero number is TRUE ...
as.logical("pi") # ... but not non-numeric types. NA is "Not Available".
#Create a vector and list its contents and length:
f <- c(1, 1, 3, 5, 8, 13, 21)
f
length(f)

# Various ways to retrieve values from the vector.
f[1] # By index: "1" is first element. 
f[length(f)] # length() is the index of the last element.
1:4 # This is the range operator
f[1:4] # using the range operator (it generates a sequence and returns it in a vector)
f[4:1] # same thing, backwards
seq(from=2, to=6, by=2) # The seq() function is a flexible, generic way to generate sequences
seq(2, 6, 2) # Same thing: arguments in default order
f[seq(2, 6, 2)]

# ...using an index vector with positive indices
a <- c(1, 3, 4, 1) # the elements of index vectors must be valid indices of the target vector. The index vector can be of any length.
f[a] # Here, four elements are retrieved from f[]

# ...using an index vector with negative indices
a <- -(1:4) # If elements of index vectors are negative integers, the corresponding elements are excluded.
f[a] # Here, the first four elements are omitted from f[]
f[-((length(f)-3):length(f))] # Here, the last four elements are omitted from f[]

# ...using a logical vector
f>4 # A logical expression operating on the target vector returns a vector of logical elements. It has the same length as the target vector.
f[f>4]; # We can use this logical vector to extract only elements for which the logical expression evaluates as TRUE

# Example: extending the Fibonacci series for three steps. 
# Think: How does this work? What numbers are we adding here and why does the result end up in the vector?
f <- c(f, f[length(f)-1] + f[length(f)]); f 
f <- c(f, f[length(f)-1] + f[length(f)]); f 
f <- c(f, f[length(f)-1] + f[length(f)]); f 

# coercion
c(1, 2.0, "3", TRUE)
[1] "1"    "2"    "3"    "TRUE"
f
f+1
f*2

# computing with two vectors of same length
a <- f[-1]; a # like f[], but omitting the first element
b <- f[1:(length(f)-1)]; b # like f[], but shortened by the least element
c <- a / b # the "golden ratio", phi (~1.61803 or (1+sqrt(5))/2 ), an irrational number, is approximated by the ratio of two consecutive Fibonacci numbers.
c
abs(c - ((1+sqrt(5))/2)) # Calculating the error of the approximation, element by element
a <- 1:12; a
dim(a) <- c(2,6); a
dim(a) <- c(2,2,3); a
dim(a)    # returns a vector
dim(a)[3]  # only the third value of the vector
a <- 1:4
b <- 5:8
c <- rbind(a, b); c
d <- cbind(a, b); d
e <- cbind(d, 9:12); e
e[1,] # first row
e[,2] # second column
e[3,2] # element at index row=3, column = 2
e[3:4, 1:2] # submatrix
pUC19 <- list(size=2686, marker="ampicillin", ori="ColE1", accession="L01397", BanI=c(235, 408, 550, 1647) )
pUC19[[1]]
pUC19[[2]]
pUC19$ori
pUC19$BanI[2]
Name	Size	Marker	Ori	Sites
pUC19	2686	Amp	ColE1	EcoRI, SacI, SmaI, BamHI, XbaI, PstI, HindIII
pBR322	4361	Amp, Tet	ColE1	EcoRI, ClaI, HindIII
pACYC184	4245	Tet, Cam	p15A	ClaI, HindIII

Vectors <- read.table("vectors.tsv", sep="\t", header=TRUE)
Vectors
V2 <- edit(Vectors)
Vectors[1, ]
Vectors[2, ]
Vectors[ ,2 ]

Vectors$Name

Vectors$Size > 3000
Vectors$Name[Vectors$Size > 3000]
Vectors$Name[Vectors$Ori != "ColE1"]

Vectors[order(Vectors$Size), ]

grep("Tet", Vectors$Marker)
Vectors[grep("Tet", Vectors$Marker), ]
Vectors[grep("Tet", Vectors$Marker), "Ori"]
as.vector(Vectors[grep("Tet", Vectors$Marker), "Ori"])
biCode <- function(s) { 
	substr(s, 4, 6) <- substr(strsplit(s,"\\s+")[[1]][2], 1, 2)
	return (toupper(substr(s, 1, 5)))
}

biCode("Homo sapiens")              # HOMSA
biCode("saccharomyces cerevisiae")  # SACCE
fib <- function(n) { 
   if (n < 1) { return( c(0) ) }
   else if (n == 1) { return( c(1) ) }
   else if (n == 2) { return( c(1, 1) ) }
   else {
      v <- c(1, 1)
      for ( i in 3:n ) {
         v <- c(v, v[length(v)-1] + v[length(v)])
      }
      return( v )
   }
}
rollDice <- function(len=1, MIN=1, MAX=6) {
	v <- c()
    for (i in 1:len) {
    	x <- runif(1, min=MIN, max=MAX)
    	x <- as.integer(x)
    	v <- c(v, x)
    }
	return(v)
}
rollDice()
table(rollDice(1000))
debug(rollDice)
rollDice(10)
debugging in: rollDice(10)
debug at #1: {
    v <- c()
    for (i in 1:len) {
        x <- runif(1, min = MIN, max = MAX)
        x <- as.integer(x)
        v <- c(v, x)
    }
    return(v)
}
Browse[2]> 
debug at #2: v <- c()
Browse[2]> 
debug at #3: for (i in 1:len) {
    x <- runif(1, min = MIN, max = MAX)
    x <- as.integer(x)
    v <- c(v, x)
}
Browse[2]> 
debug at #4: x <- runif(1, min = MIN, max = MAX)
Browse[2]> 
debug at #5: x <- as.integer(x)
Browse[2]> x   # Here we examine the current value of x
[1] 4.506351
Browse[2]> 
debug at #6: v <- c(v, x)
Browse[2]> 
debug at #4: x <- runif(1, min = MIN, max = MAX)
Browse[2]> v
[1] 4      # Aha: as.integer() truncates, but doesn't round!
Browse[2]> Q
rollDice <- function(len=1, MIN=1, MAX=6) {
	v <- c()
    for (i in 1:len) {
    	x <- runif(1, min=MIN, max=MAX+1)
    	x <- as.integer(x)
    	v <- c(v, x)
    }
	return(v)
}
table(rollDice(1000))
# Disclaimer: this function would actually be better
# written as ...

rollDice <- function(len=1, MIN=1, MAX=6) {
	return(as.integer(runif(len, min=MIN, max=MAX+1)))
}

# Check:
table(rollDice(1000))
# ... since runif() can return a vector of deviates,
# but we would not be able to check the value of
# individual trials.

# Disclaimer 2
# A base R function exists that we can use for this
# purpose: sample()

table(sample(1:6, 1000, replace=TRUE))

R tutorial

Contents

The environment

Installation

User interface

A note on R Studio

The Help system

Working directory

.Rprofile - startup commands

... unix systems

... Mac OS X systems

...Windows systems

Workspace

Packages

Scripts

Simple commands

Operators

Functions

Variables

Scalar data

Vectors

Matrices

Lists

Data frames

Writing your own functions

Debugging

Notes

Further reading and resources

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Sections

Tools