dplyr
is a package for manipulating data in R- It is an essential tool for working with ROI structural data
- Requires that you get your data into a data frame before starting
April 14, 2017
dplyr
is a package for manipulating data in Rmagrittr
is a package that is very useful along with dplyrinstall.packages("magrittr")
a = f(b) a = g(a, c) a = h(a, d)
with code like this
a = f(b) %>% g(c) %>% h(d)
dplyr
. This is a package for manipulating and wrangling data framesdplyr
includes a collection of verbs for manipulating data, we’ll go through them one at a timefilter
, select
, mutate
, summarise
data/
dir("../data")
[1] "kirby127a_3_1_ax_283Labels_M2_corrected_stats.txt" [2] "kirby142a_3_1_ax_283Labels_M2_corrected_stats.txt"
## Only if you don't have the package installed #source("https://neuroconductor.org/neurocLite.R") #neuro_install("MRIcloudT1volumetrics") require(devtools) devtools::install_github("bcaffo/MRIcloudT1volumetrics")
library(magrittr); library(dplyr); library(MRIcloudT1volumetrics) roiDir = "../data/" fileList = dir(roiDir) dat = readSubject(paste(roiDir, fileList[1], sep = "")) %>% subject2df()
head(dat, 10)
rawid roi volume min max mean std 1 kirby127a_3_1_ax.img Telencephalon_L 531111 0 374 128.3013 51.8593 2 kirby127a_3_1_ax.img Telencephalon_R 543404 0 300 135.0683 53.6471 3 kirby127a_3_1_ax.img Diencephalon_L 9683 15 295 193.5488 32.2733 4 kirby127a_3_1_ax.img Diencephalon_R 9678 10 335 193.7051 32.7869 5 kirby127a_3_1_ax.img Mesencephalon 10268 55 307 230.8583 29.2249 6 kirby127a_3_1_ax.img Metencephalon 159402 2 299 138.5200 52.2241 7 kirby127a_3_1_ax.img Myelencephalon 4973 12 286 199.8497 36.6501 8 kirby127a_3_1_ax.img CSF 109776 0 258 33.0193 26.3262 9 kirby127a_3_1_ax.img CerebralCortex_L 276965 0 249 88.5014 27.7510 10 kirby127a_3_1_ax.img CerebralCortex_R 282858 0 295 93.7283 28.1726 type level 1 1 1 2 1 1 3 1 1 4 1 1 5 1 1 6 1 1 7 1 1 8 1 1 9 1 2 10 1 2
select
will keep the columns that you specifydat = select(dat, -rawid) head(dat, 10)
roi volume min max mean std type level 1 Telencephalon_L 531111 0 374 128.3013 51.8593 1 1 2 Telencephalon_R 543404 0 300 135.0683 53.6471 1 1 3 Diencephalon_L 9683 15 295 193.5488 32.2733 1 1 4 Diencephalon_R 9678 10 335 193.7051 32.7869 1 1 5 Mesencephalon 10268 55 307 230.8583 29.2249 1 1 6 Metencephalon 159402 2 299 138.5200 52.2241 1 1 7 Myelencephalon 4973 12 286 199.8497 36.6501 1 1 8 CSF 109776 0 258 33.0193 26.3262 1 1 9 CerebralCortex_L 276965 0 249 88.5014 27.7510 1 2 10 CerebralCortex_R 282858 0 295 93.7283 28.1726 1 2
filter
selects rows of a dataframe based on criteriadat = filter(dat, type == 1, level == 1) dat
roi volume min max mean std type level 1 Telencephalon_L 531111 0 374 128.3013 51.8593 1 1 2 Telencephalon_R 543404 0 300 135.0683 53.6471 1 1 3 Diencephalon_L 9683 15 295 193.5488 32.2733 1 1 4 Diencephalon_R 9678 10 335 193.7051 32.7869 1 1 5 Mesencephalon 10268 55 307 230.8583 29.2249 1 1 6 Metencephalon 159402 2 299 138.5200 52.2241 1 1 7 Myelencephalon 4973 12 286 199.8497 36.6501 1 1 8 CSF 109776 0 258 33.0193 26.3262 1 1
dat = mutate(dat, icv = sum(volume)) dat
roi volume min max mean std type level icv 1 Telencephalon_L 531111 0 374 128.3013 51.8593 1 1 1378295 2 Telencephalon_R 543404 0 300 135.0683 53.6471 1 1 1378295 3 Diencephalon_L 9683 15 295 193.5488 32.2733 1 1 1378295 4 Diencephalon_R 9678 10 335 193.7051 32.7869 1 1 1378295 5 Mesencephalon 10268 55 307 230.8583 29.2249 1 1 1378295 6 Metencephalon 159402 2 299 138.5200 52.2241 1 1 1378295 7 Myelencephalon 4973 12 286 199.8497 36.6501 1 1 1378295 8 CSF 109776 0 258 33.0193 26.3262 1 1 1378295
Everything we’ve done so far
dat = readSubject(paste(roiDir, fileList[1], sep = "")) %>% subject2df() %>% filter(type == 1, level == 1) %>% mutate(icv = sum(volumes))
fullData = readSubject(paste(roiDir, fileList[1], sep = "")) %>% subject2df() head(fullData)
rawid roi volume min max mean std 1 kirby127a_3_1_ax.img Telencephalon_L 531111 0 374 128.3013 51.8593 2 kirby127a_3_1_ax.img Telencephalon_R 543404 0 300 135.0683 53.6471 3 kirby127a_3_1_ax.img Diencephalon_L 9683 15 295 193.5488 32.2733 4 kirby127a_3_1_ax.img Diencephalon_R 9678 10 335 193.7051 32.7869 5 kirby127a_3_1_ax.img Mesencephalon 10268 55 307 230.8583 29.2249 6 kirby127a_3_1_ax.img Metencephalon 159402 2 299 138.5200 52.2241 type level 1 1 1 2 1 1 3 1 1 4 1 1 5 1 1 6 1 1
summarise
creates summariesgroup_by
operates functions over groupsfullData %>% group_by(type, level) %>% summarise(nvol = n(), sum = sum(volume), mean = mean(volume)) %>% as.data.frame() #just for display
type level nvol sum mean 1 1 1 8 1378295 172286.875 2 1 2 19 1378299 72542.053 3 1 3 54 1378314 25524.333 4 1 4 136 1378345 10134.890 5 1 5 276 1378405 4994.221 6 2 1 5 1378295 275659.000 7 2 2 18 1378299 76572.167 8 2 3 52 1378312 26506.000 9 2 4 70 1378318 19690.257 10 2 5 198 1378373 6961.480