Presentation is loading. Please wait.

Presentation is loading. Please wait.

Ch.4 Data Manipulation.

Similar presentations


Presentation on theme: "Ch.4 Data Manipulation."— Presentation transcript:

1 Ch.4 Data Manipulation

2 4.1 Vector subscripts # x[subscript] for a vector x x <- c(6,7,2,4)
[1] 6 x[length(x)] [1] 4

3 4.1 Vector subscripts # A vector of positive natural numbers x
[1] x[1:3] [1] x[c(4,1,4)] [1] x[3] <- 4 x[1:3] <- 4 x[1:3] <- c(1,2)

4 4.1 Vector subscripts # A logical vector x <- c(10,4,6,7,8)
y <- x>9 y [1] TRUE FALSE FALSE FALSE FALSE x[y] [1] 10 x[x>9] [1] 10) x[x>9] <- 9

5 4.1 Vector subscripts # A vector of negative natural numbers
x <- c(1,2,3,6) x[-(1:2)] # gives (x[3], x[4]) [1] 3 6 x <- c(1,2,3,4,5) x[7] [1] NA x[1:6] [1] NA

6 4.1 Vector subscripts # Some useful functions
length(x); sum(x); prod(x); max(x); min(x); x <- 10:71 y <- 45:21 sum(x,y); prod(x,y); max(x,y); min(x,y) # chop off last part of a vector x <- 10:100 length(x) = 20 cumsum(rep(2,10)) [1]

7 4.1 Vector subscripts # Some useful functions
x <- c(2,6,4,5,5,8,8,1,3,0) length(x) [1] 10 sort(x) [1] sort(x, decr = TRUE) [1]

8 4.1 Vector subscripts # Some useful functions
x <- rnorm(10) # create 10 random numbers y <- 1: # create the numbers 1,2,3,..,10 z <- order(x) # create a permutation vector sort(x) # sort x [1] … y[z] # change the order of elements of y [1] x <- rnorm(10) round(rev(sort(x)),2) [1] …

9 4.1 Vector subscripts # Some useful functions
x <- c(2,6,4,5,5,8,8,1,3,0) unique(x) [1] duplicated(x) [1] FALSE FALSE FALSE FALSE TRUE FALSE TRUE ... x <- c(1,3,5,8,15) diff(x) [1] diff(x, lag=2) [1]

10 4.2 Matrix subscripts # x[subscript] x <- matrix(1:36, ncol=6)
# the element in row 2 and column 6 of x x[2,6] [1] 32 # the third row of x x[3, ] [1]

11 4.2 Matrix subscripts # the element in row 3 and column 1 and
x[3,c(1,5)] [1] 3 27 # show x, except for the first column x[,-1] [,1] [,2] [,3] [,4] [,5] [1,] [2,] [3,] [4,] [5,] [6,]

12 4.2 Matrix subscripts x[-3,-4] [,1] [,2] [,3] [,4] [,5]
[1,] [2,] [3,] [4,] [5,] x <- x[-3,4] x[4,5] <- 5 x <- matrix(rnorm(100),ncol=10) x[ ,1] <- 1:10

13 4.2 Matrix subscripts x <- matrix(1:36,ncol=6) y <- x>19 y
[,1] [,2] [,3] [,4] [,5] [,6] [1,] FALSE FALSE FALSE FALSE TRUE TRUE [2,] FALSE FALSE FALSE TRUE TRUE TRUE [3,] FALSE FALSE FALSE TRUE TRUE TRUE [4,] FALSE FALSE FALSE TRUE TRUE TRUE [5,] FALSE FALSE FALSE TRUE TRUE TRUE [6,] FALSE FALSE FALSE TRUE TRUE TRUE x[y] [1] … x <- matrix(rnorm(100),ncol=10) x[x>0] <- 0

14 4.2 Matrix subscripts x <- matrix(1:36,ncol=6) x
[,1] [,2] [,3] [,4] [,5] [,6] [1,] [2,] [3,] [4,] [5,] [6,] r <- cbind( c(1,2,5), c(3,4,4)) r [,1] [,2] [1,] [2,] [3,]

15 4.2 Matrix subscripts x <- matrix(1:36,ncol=6) x[3];x[9];x[36]
[1] 3 [1] 9 [1] 36 x[21:30] [1]

16 4.3 manipulating Data frames
4.3.1 Extracting data from data frames test <- read.csv(file.choose(),header=F) test V1 V2 V3 1 1 a 1.2 2 2 b 1.5 3 3 c 1.3 4 c 1.2 V1 <- test$V1 V2 <- test[["V2"]] # column factor V2 <- test["V2"] # data.frame V3 <- test[["V3"]] # numeric vector V4 <- test[c("V1", "V3")] # data.frame

17 4.3 manipulating Data frames
4.3.4 Merging data frames x <- data.frame(k1=c(NA,NA,3,4,5), k2=c(1,NA,NA,4,5), data=1:5) y <- data.frame(k1=c(NA,2,NA,4,5), k2=c(NA,NA,3,4,5), data=1:5) x k1 k2 data 1 NA 2 NA NA 2 3 3 NA 3 y k1 k2 data 1 NA NA 1 2 2 NA 2 3 NA

18 4.3 manipulating Data frames
4.3.4 Merging data frames merge(x, y, by=c("k1","k2")) # NA's match k1 k2 data.x data.y 3 NA NA x k1 k2 data 1 NA 2 NA NA 2 3 3 NA 3 y k1 k2 data 1 NA NA 1 2 2 NA 2 3 NA

19 4.3 manipulating Data frames
4.3.4 Merging data frames merge(x, y, by="k1") # NA's match, so 6 rows k1 k2.x data.x k2.y data.y 3 NA NA 4 NA 5 NA NA NA 6 NA NA x k1 k2 data 1 NA 2 NA NA 2 3 3 NA 3 y k1 k2 data 1 NA NA 1 2 2 NA 2 3 NA

20 4.3 manipulating Data frames
Aggregating data frames gr <- c("A","A",“B","B") x <- c(1,2,3,4) y <- c(4,3,2,1) myf <- data.frame(gr, x, y) aggregate(myf, list(myf$gr), mean) Group.1 gr x y A NA B NA Warning messages: 1: In mean.default(X[[1L]], ...) : argument is not numeric or logical: returning NA 2: In mean.default(X[[2L]], ...) : aggregate(myf[,c("x","y")],list(myf$gr),mean)

21 4.3 manipulating Data frames
4.3.6 Stacking columns of data frames group1 <- rnorm(3); group2 <- rnorm(3) df <- data.frame(group1,group2) stack(df) # data.frame values ind group1 group1 group1 group2 group2 group2 cbind(group1,group2) # numeric matrix group1 group2 [1,] [2,] [3,]

22 4.3 manipulating Data frames
4.3.7 Reshaping data df.wide <- data.frame(Subject = c(1, 2), m1 = c(4, 5), m2 = c(5.6, 7.8), m3 = c(3.6, 6.7) ) df.wide Subject m1 m2 m3

23 4.3 manipulating Data frames
4.3.7 Reshaping data df.long <- reshape(df.wide, varying = list(c("m1", "m2", "m3")), idvar = "Subject", direction = "long", v.names = "Measurement" ) df.long Subject time Measurement

24 4.4 Attributes x <- rnorm(10) attributes(x) NULL
attr(x, "description") <- "The unit is month" x [1] [6] attr(, "description") [1] "The unit is month"

25 4.4 Attributes x <- structure(x, atr1=8,atr2="test") x
[1] [6] attr(, "description"): [1] "The unit is month" attr(, "atr1"): [1] 8 attr(, "atr2"): [1] "test"

26 4.4 Attributes attributes(x) $description: [1] "The unit is month“
$atr1: [1] 8 $atr2: [1] "test“ attributes(x)$description attr(x,"description") [1] "The unit is month"

27 4.5 Character manipulation
4.5.1 nchar, substring and paste x <- c("a","b","c") mychar1 <- "This is a test" mychar2 <- "This is another test" charvector <- c("a", "b", "c", "test") nchar(mychar1) [1] 15 nchar(charvector) [1] x <- c("Gose", "Longhow", "David") substring(x,first=2,last=4) [1] "ose" "ong" "avi“

28 4.5 Character manipulation
4.5.1 nchar, substring and paste paste("number",1:10, sep=".") [1] "number.1" "number.2" "number.3" "number.4" [5] "number.5" "number.6" "number.7" "number.8" [9] "number.9" "number.10“ paste("number",1:10, sep="-") [1] "number-1" "number-2" "number-3" "number-4" [5] "number-5" "number-6" "number-7" "number-8" [9] "number-9" "number-10"

29 4.6 Creating factors from continuous data
x <- 1:15 breaks <- c(0,5,10,15,20) cut(x,breaks) [1] (0,5] (0,5] (0,5] (0,5] (0,5] (5,10] (5,10] (5,10] (5,10] [10] (5,10] (10,15] (10,15] (10,15] (10,15] (10,15] Levels: (0,5] (5,10] (10,15] (15,20] cut(x,breaks=5) [1] (0.986,3.79] (0.986,3.79] (0.986,3.79] (3.79,6.6] (3.79,6.6] [6] (3.79,6.6] (6.6,9.4] (6.6,9.4] (6.6,9.4] (9.4,12.2] [11] (9.4,12.2] (9.4,12.2] (12.2,15] (12.2,15] (12.2,15] Levels: (0.986,3.79] (3.79,6.6] (6.6,9.4] (9.4,12.2] (12.2,15]

30 4.6 Creating factors from continuous data
x <- rnorm(15) cut(x, breaks=3, labels=c("low","medium","high")) [1] high medium medium medium medium high low high low low [11] high low low medium high Levels: low medium high


Download ppt "Ch.4 Data Manipulation."

Similar presentations


Ads by Google