A = 120.5 m = 2.0 n = 3.2 Q = A * log(m^2 + n^2) Q a = 33.5 b = 50.0 x = 4.1 l = 2.3 M = (a/(a+b)) * sin(x/l) * cos(x/l) M z = 22.9 r = 6.7 y = sqrt(1.0 + (z/r)^0.38) y astr = "ATGCGCTAGACAG" slen = nchar(astr) slen str1 = "ATGCTGAG" str2 = "XXXXX" ps = paste(str1,str2) ps scat <- paste(str1, str2, sep="---") scat scat = paste(str1,str2,sep="") scat str = "Mitochondria and Golgi bodies" su = substr(str,4,8) su scat substr(scat,4,8) <- "UUUUU" scat str4 <- "AECH9939-ALM" strunk <- strtrim(str4, 4) strunk st = "filename_doc" st = strsplit(st, "_") st aa <- unlist(strsplit("fname.doc", "\\.")) aa[1] aa[2] str = "THIS IS a sentance" toupper(str) tolower(str) avec <- c(10.2, 5.5, 6.9, 7.2, 8.1) avec avec1 <- c("AEC", "AED", "AAB", "AFC") avec2 <- c(10.2, 5.5, "6.9", 7.2, 8.1) avec2 x <- c(10,20,30,40,50,60,70,80,90,100,110,120,130) x[3] z = x[c(1,3,6)] z z = x[c(4:9)] z vstr <- c(1,2,3,4,5,6,7,8,9) vstr + 100 vec1 <- c(1.5,2.5,3.5,4.5,5.5,6.5) vec2 <- c(10,20,30,40,50) vec1+vec2 avec = c() avec = c(avec,"ATG","TTG") avec avec = c(avec, "TATATA", "TTTTTAA") avec x = c(1, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0) y = (3*x^3) - (4*x^2) + (5*x) - 6 y 3*x^3 sq <- seq(1,50) sq sq <- seq(1,50,5) sq sq <- seq(50,1,-5) sq x = c(17, 18, 17, 19, NA, 17, 20, NA, 16, 22) x x*10 is.na(x) x[is.na(x)] x[!is.na(x)] x[is.na(x)] <- 0 x x = c(12,2,34,67,22,55,123) sor = sort(x) sor ys = sort(x, decreasing=TRUE) ys y = c(12,2,34,NA,67,29,NA,NA,45,99) max(y) min(y, na.rm=TRUE) labs <- paste( c("X"), 1:20, sep="") labs labs <- paste( c("X","Y"), 1:20, sep="") labs x = c(5,10,15,20,25,30,35,40) yr = x[-2] yr x = c(5,10,15,20,25,30,35,40) ya = x[-2:-5] ya y = x[c(-2,-4,-7)] x <- c(10,20,30,40,50,60,70,80,90,100,110,120) arr <- array(x, dim=c(4,3)) arr x <- c(10,20,30,40,50,60,70,80,90,100,110,120) brr <- array(x, dim=c(2,2,3)) brr x <- c(10,20,30,40,50,60,70,80,90,100,110,120) amat <- matrix(x, nrow=3, ncol=4) amat amat <- matrix(x, nrow=3, ncol=4, byrow=TRUE) amat rownames(amat) <- c("r1","r2","r3") colnames(amat) <- c("c1", "c2", "c3", "c4") amat dim(amat) ax = c(2,5,3,7,1,8,9,10,1,12,5,10,4,17,15,11) A = matrix(ax, nrow=4, ncol=4) A bx = c(12,5,3,17,1,18,9,10,1,12,5,10,4,15,15,14) B = matrix(bx, nrow=4, ncol=4) B cx = c(13,6,10,8,4,7,31,8) C = matrix(cx, nrow=4, ncol=2) C R = A*B R M = A %*% B M X = c(5, 6, 8, 9) X Y = c(8, 10, 12, 5) P = X %o% Y P A T = t(A) T X = c(10,20,30,40,50) D = diag(X) D A d = diag(A) d k = 5 ds = diag(k) ds # solve # 3x + 4y - 2z = 5 # 4x - 5y + z = -3 # 10x - 6y + 5z = 13 xvec = c(3, 4, -2, 4, -5, 1, 10, -6, 5) A = matrix(xvec, nrow=3, ncol=3, byrow=TRUE) A b = c(5, -3, 13) X = solve(A, b) X # For a square matrix 'A', the function call solve(A) returns the inverse of 'A': A invA = solve(A) invA xx = c(3.2, 5.9, 4.5, 6.8, 2.3, 1.1, 8.2, 3.9, 9.6, 6.7, 8.1, 1.5) A = matrix(xx, nrow=4, ncol=3) A rowMeans(A) colMeans(A) rowSums(A) colSums(A) ax = c(2,5,3,7,1,8,9,10,1,12,5,10,4,17,15,11) A = matrix(ax, nrow=4, ncol=4) res = eigen(A) res$values res$vectors #Dataframe is very useful for combining vectors of same length with different data types into a single data structure data1 <- c("Iron","Sulphur","Calcium", "Magnecium", "Copper") data2 <- c(12.5, 32.6, 16.7, 20.6, 7.5) data3 <- c(1122, 1123, 1124, 1125, 1126) frm1 <- data.frame(data1, data2, data3) frm1 names(frm1) rname = rownames(frm1) rname cname = colnames(frm1) cname names(frm1) <- c("Element", "Proportion", "Product_ID") frm1 rownames(frm1) = c("elmt-1","elmt-2","elmt-3","elmt-4","elmt-5") frm1 #Accessing the elements of a data frame by index frm1[1,3] frm1[1,] frm1[1:3,] #Accessing a column of a data frame by name frm1$Proportion frm1$Element 1000*frm1$Proportion #Adding a new column to the data frame frm1$symbol = c("Fe","S","Ca","Mg","Cu") frm1 #Removing a column by name from a data frame frm1 frm1$Product_ID <- NULL frm1 # To attach a data frame frm1 symbol attach(frm1) symbol ##---------------------------- #Writing an R function # defining a function called normalize normalize <- function(avec, anum) { norvec <- (avec/anum)^0.5 return(norvec) } # Defining a vector and a number for data. vec <- c(45.0, 67.0, 81.0, 57.0, 103.0, 122.0, 68.0, 98.0) anumber = 21.5 # function call normalvec <- normalize(vec, anumber) # print the resulting vector returned by the function print(normalvec) ###--------------------------------------------- ###To run an R script : a different file source("test.R") ####-------------------- #Creating subsets of data frames # creating a vector of gene names genes = c("gene-1","gene-2","gene-3","gene-4","gene-5","gene-5","gene-6") # creating a vector of gender gender = c("M", "M", "F", "M", "F", "F", "M") # creating 7 data vectors with experimental results result1 = c(12.3, 11.5, 13.6, 15.4, 9.4, 8.1, 10.0) result2 = c(22.1, 25.7, 32.5, 42.5, 12.6, 15.5, 17.6) result3 = c(15.5, 13.4, 11.5, 21.7, 14.5, 16.5, 12.1) result4 = c(14.4, 16.6, 45.0, 11.0, 9.7, 10.0, 12.5) result51 = c(12.2, 15.5, 17.4, 19.4, 10.2, 9.8, 9.0) result52 = c(13.3, 14.5, 21.6, 17.9, 15.6, 14.4, 12.0) result6 = c(11.0, 10.0, 12.2, 14.3, 23.3, 19.8, 13.4) # creating a data frame with this data. # genes along rows, results along columns datframe = data.frame(genes,gender,result1,result2,result3,result4, result51,result52,result6) # adding column names to data frame names(datframe) = c("GeneName", "Gender", "expt1", "expt2", "expt3", "expt4", "expt51", "expt52", "expt6") datframe # creating subset of data with expt2 values above 20 subframe1 = subset(datframe, datframe$expt2 > 20) subframe1 # creating a subset of data with only Female gender subframe2 = subset(datframe, datframe$Gender == "F") subframe2 # creating a subset with male gender for which expt2 is less than 30 subframe3 = subset(datframe, (datframe$Gender == "M")&(datframe$expt2 < 30.0) ) subframe3 # printing the data frames print("subframe1 : Rows with expt2 > 20") print(subframe1) print("subframe2 : Rows with gender Female") print(subframe2) print("subframe3 : Rows with Male gender and expt2 < 30.0") print(subframe3) ###----------------------------------------- ##Finding duplicated elements of a vector x = c('a','b','a','c','e','f','c','g','h') duplicated(x) x[duplicated(x)] x[!duplicated(x)] ###Creating a frequency table xx = c('A','T','A','G','T','A','T','C','C','A','T','T','G') tab = table(xx) tab ## If a particular element is present in the vector more than once, ##the which() function returns a vector containing the indices of all ##the locations of that element in the input vector: dat = c("ATG","TAG","ATG","TTA","TGC","ATT","ATG", "GGG") d = which(dat=="ATG") d ##PLOTS xval = seq(1,10,0.5) yval = 30*xval/(2+xval) plot(xval, yval, pch=20, cex=1, col="red" ) plot(xval, yval, pch=14, cex=1.4, col="#99AA28") #pch 0-25 diff symbols #pch=20, cex=1.2, col="blue" #pch=14, cex=1.4, col="#99AA28" # col = "#A9F3BB" colour corresponding to Red=A9, Green=F3, Blue=BB xval = seq(1,10,0.5) yval = 30*xval/(2+xval) plot(xval, yval, pch=20, cex=1, col="red", type="b") plot(xval, yval, pch=20, cex=1, col="red", type="h") # type="p" plots points # type="l" plots lines # type="b" plots points and lines # type="o" plots points overlaid by lines # type="h" plot with histogram like vertical lines # type="h" plot with histogram like vertical lines # type="s" plot with stair steps # type="n" no plotting - blank plot with axis marked (x,y) ###------------- ##Setting the ranges of the X,Y axes Xvalue = c(5,6,7,8,9,10,11,12,13,14) Yvalue = c(12, 23, 36, 48, 53, 64, 78, 89, 91, 110) # No range selection. Default range from data plot(Xvalue, Yvalue, col="blue", type="o") # Range is set using xlim and ylim parameters plot(Xvalue, Yvalue, col="blue", type="o", xlim=c(1,20), ylim=c(1,150)) ###---------------------Log scale y axis Xval = c(10,20,30,40,50,60) Yval = 3*Xval^2 plot(Xval, Yval, log="y", type="o", col="blue", lwd=2) Yval ###------------------ ##scatter plot between these two sets of random Gaussian deviates # Generate 1000 random numbers from unit gaussian distribution Xrandom = 10*rnorm(1000) # Generate 1000 numbers from unit Gaussian Yrandom = 10*rnorm(1000) # plot the scatter plot. We choose color in Hexadecimal system plot(Xrandom, Yrandom, cex=0.2, col="#FF5533", main="Scatter plot between 2 Gaussian deviates", xlim=c(-40,40), ylim=c(-40,40)) library(DMwR) head(algae) algae <- read.table('Analysis.txt', header=F, dec='.', col.names=c('season','size','speed','mxPH','mnO2','Cl', 'NO3','NH4','oPO4','PO4','Chla','a1','a2','a3','a4', 'a5','a6','a7'), na.strings=c('XXXXXXX')) algae[1:5,] ### Data Visualization and Summarization summary(algae) ### for statistical properties hist(algae$mxPH, prob=T) library(car) ##q-q plot in car package par(mfrow=c(1,2)) ## to set 2 figures in 2 columns in 1 row hist(algae$mxPH, prob=T, xlab='', main='Histogram of maximum pH value',ylim=0:1) lines(density(algae$mxPH,na.rm=T)) rug(jitter(algae$mxPH)) ## vertical lines at bottom to detect outlier qq.plot(algae$mxPH,main='Normal QQ plot of maximum pH') ## qq plot to see values that appear out of 95% conf(dash line). interval of normal distribution ##Normal solid black line is normal distribution. par(mfrow=c(1,1)) # make figure back to 1 column