session2.R

#------------------------------------------------
# 1. Data frames
#
# Things to learn:
#    read.csv
#    to extract elements, specify which rows you want, then which columns
#    (using any of the three types of indexing)
#    to add columns, use $<-

# Imports a CSV file as a "data frame".
df <- read.csv('c:/documents/netperf/notes/rbook/data/peering-stats.csv')

df[1:5, c('source','dest','latency')] # first 5 rows, those 3 named columns
df[1:5, ]  # first 5 rows, all columns
df[1:5,'latency'] # first 5 rows, 'latency' column: collapse down to a vector
df$latency # entire latency column
df$latency[1:3]  # first three elements of the latency vector

df$source=='Cogent' # a vector of TRUE/FALSE depending on whether the source is Cogent.
df[df$source=='Cogent',] # select all rows where source=='Cogent'
df[df$source=='Cogent',][1:5,c('dest','pktdrop')] # from these, select first 5 rows

nrow(df) # number of rows in the date frame

ncol(df) # number of columns
length(df) # number of columns

names(df) # column names

# Calculate the throughput formula for every single row in the table.
# Add a new column called 'throughput', to contain all these values.
df$throughput <- 1/(df$latency * sqrt(df$pktdrop))
df[1:5,]