Difference between revisions of "R tutorial"

input prompt

>

> help(rnorm)
>
> ?rnorm
>
> ?binom     
No documentation for 'binom' in specified packages and libraries:
you could try '??binom'
> ??binom
>
> ?Binomial
>
> getwd()
[1] "/Users/steipe/R"
> setwd("~") 
> getwd()
[1] "/Users/steipe"
> setwd("~/../chen")  
> getwd()
[1] "/Users/chen"
> setwd("/Users/steipe/abc/R_samples")  
> getwd()
[1] "Users/steipe/abc/R_samples"
> ls() 
character(0)
>
> a <- 1; b <-2; eps <- 0.0001
> ls() 
[1] "a"   "b"   "eps"
>
> rm(a) 
> ls()
[1] "b"   "eps"
>
rm(list= ls()) 
> ls() 
character(0)
>
> library()
> search()
 [1] ".GlobalEnv"        "tools:RGUI"        "package:stats"     "package:graphics" 
 [5] "package:grDevices" "package:utils"     "package:datasets"  "package:methods"  
 [9] "Autoloads"         "package:base"
> ?vignette
>
> ??install
> ?install.packages
> install.packages("seqinr")
--- Please select a CRAN mirror for use in this session ---
trying URL 'http://probability.ca/cran/bin/macosx/contrib/2.13/seqinr_3.0-5.tgz'
Content type 'application/x-gzip' length 4528528 bytes (4.3 Mb)
opened URL
==================================================
downloaded 4.3 Mb

The downloaded packages are in
	/var/folders/dq/dqPEEPbF0ApRU/-Tmp-//RtmpBlw/downloaded_packages
> 
> library("seqinr")
> ls("package:seqinr")
  [1] "a"                       "aaa"                     "AAstat"                 
  [4] "acnucclose"              "acnucopen"               "al2bp"                  
     [...]
[205] "where.is.this.acc"       "words"                   "words.pos"              
[208] "write.fasta"             "zscore"                 
> ?a
> a("Tyr")
[1] "Y"
> choosebank()
 [1] "genbank"       "embl"          "emblwgs"       "swissprot"     "ensembl"      
    [...]
 [31] "refseqViruses"
choosebank("swissprot")
query("seq", "N=MBP1_YEAST")
mbp1 <- getSequence(seq)
closebank()
x <- AAstat(mbp1[[1]])
barplot(sort(x$Compo))
# sample script:
# define a vector
a <- c(1, 1, 2, 3, 5, 8, 13)
# list its contents
a
# calculate the mean of its values
mean(a)
source("sample.R")
# sample script:
# define a vector
a <- c(1, 1, 2, 3, 5, 8, 13)
# list its contents
print(a)
# calculate the mean of its values
print(mean(a))
?sink
5
5 + 3
5 + 1 / 2
3 * 2 + 1
3 * (2 + 1)
2^3 # Exponentiation
8 ^ (1/3) # Third root via exponentiation
7 %% 2  # Modulo operation (remainder of integer division)
7 %/% 2 # Integer division
cos(pi) #"pi" is a predefined constant.
sin(pi) # Note the rounding error. This number is not really different from  zero.
sin(30 * pi/180) # Trigonometric functions use radians as their argument - this conversion calculates sin(30 degrees)
exp(1) # "e" is not predefined, but easy to calculate.
log(exp(1)) # functions can be arguments to functions - they are evaluated from the inside out.
log(10000) / log(10) # log() calculates natural logarithms; convert to any base by dividing by the log of the base. Here: log to base 10.
exp(complex(r=0, i=pi)) #Euler's identity
complex(1)
complex(4)
complex(1, 2) # imaginary part missing - defaults to zero
complex(1, 2, 3) # one complex number
complex(4, 2, 3) # four complex numbers
complex(real = 0, imaginary = pi) # defining via named parameters
complex(imaginary = pi, real = 0) # same thing - if names are used, order is not important
complex(re = 0, im = pi) # names can be abbreviated ...
complex(r = 0, i = pi) # ... to the shortest string that is unique among the named parameters. Use this with discretion to keep your code readable.
complex(i = pi, 1, 0) # Think: what have I done here? Why does this work?
?make.names
?reserved
a <- 5
a
a + 3
b <- 8
b
a + b
a == b # equality test
a != b # not equal
a < b  # less than
a <- 3 > 5; a; mode(a); typeof(a) # Note: a > 5 is a logical expression, its value is FALSE.
a <- 3 < 5; a; mode(a); typeof(a)

a <- 3.0;   a;  mode(a); typeof(a) # Double precision floating point number
a <- 3.0e0; a;  mode(a); typeof(a) # Same value, exponential notation

a <- 3;     a;  mode(a); typeof(a) # Note: numbers are double precision floats by default.
a <- as.integer(3);  a;  mode(a); typeof(a) # If we really want an integer, we must coerce to type integer.

a <- "3"; a;  mode(a); typeof(a) # Forcing the number to be interpreted as a character.

# More coercions. For each of these, first think what result you would expect:
as.numeric("3") # character as numeric
as.numeric("3.141592653") # string as numeric
as.numeric("pi") # another string as numeric
as.numeric(pi) # not a string, but a predefined constant

as.logical(0)
as.logical(1)
as.logical(-1)
as.logical(pi) # any non-zero number is TRUE ...
as.logical("pi") # ... but not non-numeric types. NA is "Not Available".
#Create a vector and list its contents and length:
f <- c(1, 1, 3, 5, 8, 13, 21)
f
length(f)

# Various ways to retrieve values from the vector.
f[1] # By index: "1" is first element. 
f[length(f)] # length() is the index of the last element.
1:4 # This is the range operator
f[1:4] # using the range operator (it generates a sequence and returns it in a vector)
f[4:1] # same thing, backwards
seq(from=2, to=6, by=2) # The seq() function is a flexible, generic way to generate sequences
seq(2, 6, 2) # Same thing: arguments in default order
f[seq(2, 6, 2)]

# ...using an index vector with positive indices
a <- c(1, 3, 4, 1) # the elements of index vectors must be valid indices of the target vector. The index vector can be of any length.
f[a] # Here, four elements are retrieved from f[]

# ...using an index vector with negative indices
a <- -(1:4) # If elements of index vectors are negative integers, the corresponding elements are excluded.
f[a] # Here, the first four elements are omitted from f[]
f[-(length(f)-4:length(f))] # Here, the last four elements are omitted from f[]

# ...using a logical vector
f>4 # A logical expression operating on the target vector returns a vector of logical elements. It has the same length as the target vector.
f[f>4]; # We can use this logical vector to extract only elements for which the logical expression evaluates as TRUE

# Example: extending the Fibonacci series for three steps. 
# Think: How does this work? What numbers are we adding here and why does the result end up in the vector?
f <- c(f, f[length(f)-1] + f[length(f)]); f 
f <- c(f, f[length(f)-1] + f[length(f)]); f 
f <- c(f, f[length(f)-1] + f[length(f)]); f
f
f+1
f*2

# computing with two vectors of same length
a <- f[-1]; a # like f[], but omitting the first element
b <- f[1:(length(f)-1)]; b # like f[], but shortened by the least element
c <- a / b # the "golden ratio", phi (~1.61803 or (1+sqrt(5))/2 ), an irrational number, is approximated by the ratio of two consecutive Fibonacci numbers.
c
abs(c - ((1+sqrt(5))/2)) # Calculating the error of the approximation, element by element
a <- 1:12; a
dim(a) <- c(2,6); a
dim(a) <- c(2,2,3); a
a <- 1:4
b <- 5:8
c <- rbind(a, b); c
d <- cbind(a, b); d
e <- cbind(d, 9:12); e
e[1,] # first row
e[,2] # second column
e[3,2] # element at index row=3, column = 2
e[3:4, 1:2] # submatrix

@@ Line 458: / Line 458: @@
 &nbsp;
-==Matrices, tables, frames==
+==Matrices==
-Subsetting,mselecting and filtering
+If we need to operate with several vectors, or multi-dimensional data, we make use of ''matrices'' or more generally ''k''-dimensional ''arrays'' '''R'''. Matrix operations are very similar to vector operations, in fact a matrix actually is a vector for which the number of rows and columns have been defined.
+The most basic form of such definition is the <code>dim()</code> function. Consider:
+<source lang="rsplus">
+a <- 1:12; a
+dim(a) <- c(2,6); a
+dim(a) <- c(2,2,3); a
+</source>
+Alternatively, matrices can be defined using the <code>matrix()</code> or <code>array()</code> functions (see there), or "glued" together from vectors by rows or columns, using the  <code>rbind()</code> or  <code>cbind()</code> functions respectively:
+<source lang="rsplus">
+a <- 1:4
+b <- 5:8
+c <- rbind(a, b); c
+d <- cbind(a, b); d
+e <- cbind(d, 9:12); e
+</source>
+Addressing (retrieving) individual elements or slices from matrices is simply done by specifying the appropriate indices, where a missing index indicates that the entire row or column is to be retrieved
+<source lang="rsplus">
+e[1,] # first row
+e[,2] # second column
+e[3,2] # element at index row=3, column = 2
+e[3:4, 1:2] # submatrix
+</source>
+&nbsp;
+==Lists==
+...
+&nbsp;
+==Data frames==
+...
 &nbsp;
 ==Data manipulations==
 * Transformation

Difference between revisions of "R tutorial"

Revision as of 18:09, 22 August 2012

Contents

The environment

Installation

User interface

Packages

Scripts

Simple commands

Operators

Functions

Variables

Scalar data

Vectors

Matrices

Lists

Data frames

Data manipulations

Writing your own functions

Notes

Further reading and resources

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Sections

Tools