Difference between revisions of "R tutorial"

input prompt

>

> help(rnorm)
>
> ?rnorm
>
> ?binom     
No documentation for 'binom' in specified packages and libraries:
you could try '??binom'
> ??binom
>
> ?Binomial
>
> getwd()
[1] "/Users/steipe/R"
> setwd("~") 
> getwd()
[1] "/Users/steipe"
> setwd("~/../chen")  
> getwd()
[1] "/Users/chen"
> setwd("/Users/steipe/abc/R_samples")  
> getwd()
[1] "Users/steipe/abc/R_samples"
> ls() 
character(0)
>
> a <- 1; b <-2; eps <- 0.0001
> ls() 
[1] "a"   "b"   "eps"
>
> rm(a) 
> ls()
[1] "b"   "eps"
>
rm(list= ls()) 
> ls() 
character(0)
>
> library()
> search()
 [1] ".GlobalEnv"        "tools:RGUI"        "package:stats"     "package:graphics" 
 [5] "package:grDevices" "package:utils"     "package:datasets"  "package:methods"  
 [9] "Autoloads"         "package:base"
> ?vignette
>
> ??install
> ?install.packages
> install.packages("seqinr")
--- Please select a CRAN mirror for use in this session ---
trying URL 'http://probability.ca/cran/bin/macosx/contrib/2.13/seqinr_3.0-5.tgz'
Content type 'application/x-gzip' length 4528528 bytes (4.3 Mb)
opened URL
==================================================
downloaded 4.3 Mb

The downloaded packages are in
	/var/folders/dq/dqPEEPbF0ApRU/-Tmp-//RtmpBlw/downloaded_packages
> 
> library("seqinr")
> ls("package:seqinr")
  [1] "a"                       "aaa"                     "AAstat"                 
  [4] "acnucclose"              "acnucopen"               "al2bp"                  
     [...]
[205] "where.is.this.acc"       "words"                   "words.pos"              
[208] "write.fasta"             "zscore"                 
> ?a
> a("Tyr")
[1] "Y"
> choosebank()
 [1] "genbank"       "embl"          "emblwgs"       "swissprot"     "ensembl"      
    [...]
 [31] "refseqViruses"
choosebank("swissprot")
query("seq", "N=MBP1_YEAST")
mbp1 <- getSequence(seq)
closebank()
x <- AAstat(mbp1[[1]])
barplot(sort(x$Compo))
# sample script:
# define a vector
a <- c(1, 1, 2, 3, 5, 8, 13)
# list its contents
a
# calculate the mean of its values
mean(a)
source("sample.R")
# sample script:
# define a vector
a <- c(1, 1, 2, 3, 5, 8, 13)
# list its contents
print(a)
# calculate the mean of its values
print(mean(a))
?sink
5
5 + 3
5 + 1 / 2
3 * 2 + 1
3 * (2 + 1)
2^3 # Exponentiation
8 ^ (1/3) # Third root via exponentiation
7 %% 2  # Modulo operation (remainder of integer division)
7 %/% 2 # Integer division
cos(pi) #"pi" is a predefined constant.
sin(pi) # Note the rounding error. This number is not really different from  zero.
sin(30 * pi/180) # Trigonometric functions use radians as their argument - this conversion calculates sin(30 degrees)
exp(1) # "e" is not predefined, but easy to calculate.
log(exp(1)) # functions can be arguments to functions - they are evaluated from the inside out.
log(10000) / log(10) # log() calculates natural logarithms; convert to any base by dividing by the log of the base. Here: log to base 10.
exp(complex(r=0, i=pi)) #Euler's identity
complex(1)
complex(4)
complex(1, 2) # imaginary part missing - defaults to zero
complex(1, 2, 3) # one complex number
complex(4, 2, 3) # four complex numbers
complex(real = 0, imaginary = pi) # defining via named parameters
complex(imaginary = pi, real = 0) # same thing - if names are used, order is not important
complex(re = 0, im = pi) # names can be abbreviated ...
complex(r = 0, i = pi) # ... to the shortest string that is unique among the named parameters. Use this with discretion to keep your code readable.
complex(i = pi, 1, 0) # Think: what have I done here? Why does this work?
?make.names
?reserved
a <- 5
a
a + 3
b <- 8
b
a + b
a == b # equality test
a != b # not equal
a < b  # less than
a <- 3 > 5; a; mode(a); typeof(a) # Note: a > 5 is a logical expression, its value is FALSE.
a <- 3 < 5; a; mode(a); typeof(a)

a <- 3.0;   a;  mode(a); typeof(a) # Double precision floating point number
a <- 3.0e0; a;  mode(a); typeof(a) # Same value, exponential notation

a <- 3;     a;  mode(a); typeof(a) # Note: numbers are double precision floats by default.
a <- as.integer(3);  a;  mode(a); typeof(a) # If we really want an integer, we must coerce to type integer.

a <- "3"; a;  mode(a); typeof(a) # Forcing the number to be interpreted as a character.

# More coercions. For each of these, first think what result you would expect:
as.numeric("3") # character as numeric
as.numeric("3.141592653") # string as numeric
as.numeric("pi") # another string as numeric
as.numeric(pi) # not a string, but a predefined constant

as.logical(0)
as.logical(1)
as.logical(-1)
as.logical(pi) # any non-zero number is TRUE ...
as.logical("pi") # ... but not non-numeric types. NA is "Not Available".
#Create a vector and list its contents and length:
f <- c(1, 1, 3, 5, 8, 13, 21)
f
length(f)

# Various ways to retrieve values from the vector.
f[1] # By index: "1" is first element. 
f[length(f)] # length() is the index of the last element.
1:4 # This is the range operator
f[1:4] # using the range operator (it generates a sequence and returns it in a vector)
f[4:1] # same thing, backwards
seq(from=2, to=6, by=2) # The seq() function is a flexible, generic way to generate sequences
seq(2, 6, 2) # Same thing: arguments in default order
f[seq(2, 6, 2)]

# ...using an index vector with positive indices
a <- c(1, 3, 4, 1) # the elements of index vectors must be valid indices of the target vector. The index vector can be of any length.
f[a] # Here, four elements are retrieved from f[]

# ...using an index vector with negative indices
a <- -(1:4) # If elements of index vectors are negative integers, the corresponding elements are excluded.
f[a] # Here, the first four elements are omitted from f[]
f[-((length(f)-3):length(f))] # Here, the last four elements are omitted from f[]

# ...using a logical vector
f>4 # A logical expression operating on the target vector returns a vector of logical elements. It has the same length as the target vector.
f[f>4]; # We can use this logical vector to extract only elements for which the logical expression evaluates as TRUE

# Example: extending the Fibonacci series for three steps. 
# Think: How does this work? What numbers are we adding here and why does the result end up in the vector?
f <- c(f, f[length(f)-1] + f[length(f)]); f 
f <- c(f, f[length(f)-1] + f[length(f)]); f 
f <- c(f, f[length(f)-1] + f[length(f)]); f
f
f+1
f*2

# computing with two vectors of same length
a <- f[-1]; a # like f[], but omitting the first element
b <- f[1:(length(f)-1)]; b # like f[], but shortened by the least element
c <- a / b # the "golden ratio", phi (~1.61803 or (1+sqrt(5))/2 ), an irrational number, is approximated by the ratio of two consecutive Fibonacci numbers.
c
abs(c - ((1+sqrt(5))/2)) # Calculating the error of the approximation, element by element
a <- 1:12; a
dim(a) <- c(2,6); a
dim(a) <- c(2,2,3); a
a <- 1:4
b <- 5:8
c <- rbind(a, b); c
d <- cbind(a, b); d
e <- cbind(d, 9:12); e
e[1,] # first row
e[,2] # second column
e[3,2] # element at index row=3, column = 2
e[3:4, 1:2] # submatrix
pUC19 <- list(size=2686, marker="ampicillin", ori="ColE1", accession="L01397", BanI=c(235, 408, 550, 1647) )
pUC19[[1]]
pUC19[[2]]
pUC19$ori
pUC19$BanI[2]
Name	Size	Marker	Ori	Sites
pUC19	2686	Amp	ColE1	EcoRI, SacI, SmaI, BamHI, XbaI, PstI, HindIII
pBR322	4361	Amp, Tet	ColE1	EcoRI, ClaI, HindIII
pACYC184	4245	Tet, Cam	p15A	ClaI, HindIII

Vectors <- read.table("vectors.tsv", sep="\t", header=TRUE)
Vectors
V2 <- edit(Vectors)
Vectors[1, ]
Vectors[2, ]
Vectors[ ,2 ]

Vectors$Name

Vectors$Size > 3000
Vectors$Name[Vectors$Size > 3000]
Vectors$Name[Vectors$Ori != "ColE1"]

Vectors[order(Vectors$Size), ]

grep("Tet", Vectors$Marker)
Vectors[grep("Tet", Vectors$Marker), ]
Vectors[grep("Tet", Vectors$Marker), "Ori"]
as.vector(Vectors[grep("Tet", Vectors$Marker), "Ori"])
lg <- function(x) { log(x) / log(10) }
lg(10000) # should be 5
fib <- function(n) { 
   if (n < 1) { return( c(0) ) }
   else if (n == 1) { return( c(1) ) }
   else if (n == 2) { return( c(1, 1) ) }
   else {
      v <- c(1, 1)
      for ( i in 3:n ) {
         v <- c(v, v[length(v)-1] + v[length(v)])
      }
      return( v )
   }
}

@@ Line 611: / Line 611: @@
 &nbsp;
 [[Category:Applied_Bioinformatics]]
+[[Category:R]]
 </div>

Difference between revisions of "R tutorial"

Revision as of 21:10, 12 October 2012

Contents

The environment

Installation

User interface

Packages

Scripts

Simple commands

Operators

Functions

Variables

Scalar data

Vectors

Matrices

Lists

Data frames

Writing your own functions

Notes

Further reading and resources

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Sections

Tools