#------------------------------------------------ # 1. Data frames # # Things to learn: # read.csv # to extract elements, specify which rows you want, then which columns # (using any of the three types of indexing) # to add columns, use $<- # Imports a CSV file as a "data frame". df <- read.csv('c:/documents/netperf/notes/rbook/data/peering-stats.csv') df[1:5, c('source','dest','latency')] # first 5 rows, those 3 named columns df[1:5, ] # first 5 rows, all columns df[1:5,'latency'] # first 5 rows, 'latency' column: collapse down to a vector df$latency # entire latency column df$latency[1:3] # first three elements of the latency vector df$source=='Cogent' # a vector of TRUE/FALSE depending on whether the source is Cogent. df[df$source=='Cogent',] # select all rows where source=='Cogent' df[df$source=='Cogent',][1:5,c('dest','pktdrop')] # from these, select first 5 rows nrow(df) # number of rows in the date frame ncol(df) # number of columns length(df) # number of columns names(df) # column names # Calculate the throughput formula for every single row in the table. # Add a new column called 'throughput', to contain all these values. df$throughput <- 1/(df$latency * sqrt(df$pktdrop)) df[1:5,]