This template data frame provides a general structure for travel data that integrates with data synthesis and modeling functions. Stays (individuals reported as not traveling outside home location) are to be included in this data frame, where origin and destination are the same. Note that models fitted and then extrapolated using other data assume that the same method for defining population size is used throughout. Either dates or time span must be filled.
travel_data_template
a data frame with empty columns and generalized column names
date: beginning of the time interval for the trip count
date: end of the time interval for the trip count
integer: time span in days
integer: unique individual identifier
numeric: age of participant
logical: gender of perticipant
factor: if individual participants belong to different groups
character: name of highest administration level of origin location (Country)
character: name of administration level 1 of origin location (e.g. Division, State)
character: name of administration level 2 of origin location (e.g. District, County)
character: name of administration level 3 of origin location (e.g. Sub-district, Province)
character: name of administration level 4 of origin location (e.g. City, Municipality)
character: name of administration level 5 of origin location (e.g. Town, Village, Community, Ward)
character: administrative type for the origin location (e.g. sub-district, community vs town, or urban vs rural)
numeric: longitude of origin location centroid in decimal degrees (centroid of smallest admin unit)
numeric: latitude of origin location centroid in decimal degrees (centroid of smallest admin unit)
numeric: population size of lowest administrative unit for origin location
character: name of highest administration level of destination location (Country)
character: name of administration level 1 of destination location (e.g. Division, State)
character: name of administration level 2 of destination location (e.g. District, County)
character: name of administration level 3 of destination location (e.g. Sub-district, Province)
character: name of administration level 4 of destination location (e.g. City, Municipality)
character: name of administration level 5 of destination location (e.g. Town, Village, Community, Ward)
character: administrative type for the destination location (e.g. sub-district, community vs town, or urban vs rural)
numeric: longitude of destination location in decimal degrees (centroid of smallest admin unit)
numeric: latitude of destination location centroid in decimal degrees (centroid of smallest admin unit)
numeric: population size of lowest administrative unit for destination location
numeric: total number of observed trips made from origin to destination during time span
John Giles
#-------------------------------- # Travel among locations #-------------------------------- trip <- travel_data_template n <- 3 # Add some observations trip[1:n,] <- NA # Time span of travel survey trip$date_start <- as.Date("2020-01-01") trip$date_stop <- trip$date_start + 30 trip$date_span <- difftime(trip$date_stop, trip$date_start, units='days') # Participant info trip$indiv_id <- sample(1:100, n) trip$indiv_age <- round(runif(n, 5, 80)) trip$indiv_sex <- rbinom(n, 1, 0.5) # Origin info trip$orig_adm0 <- 'A' trip$orig_adm1 <- 'A' trip$orig_adm2 <- 'A' trip$orig_adm3 <- LETTERS[1:n] trip$orig_type <- 'Sub-district' # Type of admin unit for lowest admin level trip$orig_x <- rnorm(n, 100, 5) trip$orig_y <- rnorm(n, 20, 2) trip$orig_pop <- rpois(n, 10000) # Destination info trip$dest_adm0 <- 'A' trip$dest_adm1 <- 'A' trip$dest_adm2 <- 'B' trip$dest_adm3 <- LETTERS[(n+1):(n*2)] trip$dest_type <- 'Sub-district' # Type of admin unit for lowest admin level trip$dest_x <- rnorm(n, 100, 5) trip$dest_y <- rnorm(n, 20, 2) trip$dest_pop <- rpois(n, 5000) # Number of reported trips trip$trips <- rpois(n, 10) head(trip)#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 2020-01-01 2020-01-31 30 days 13 17 1 <NA> #> 2 2020-01-01 2020-01-31 30 days 10 9 1 <NA> #> 3 2020-01-01 2020-01-31 30 days 5 39 1 <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A A <NA> <NA> Sub-district #> 2 A A A B <NA> <NA> Sub-district #> 3 A A A C <NA> <NA> Sub-district #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 100.30449 20.22459 10215 A A B D <NA> #> 2 89.11212 20.01577 10070 A A B E <NA> #> 3 99.41070 23.75549 10076 A A B F <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> Sub-district 98.45894 21.12676 5079 11 #> 2 <NA> Sub-district 105.06001 20.64497 4933 2 #> 3 <NA> Sub-district 95.40474 20.73335 5073 9#----------------------- # Stays in home location #----------------------- stay <- travel_data_template n <- 3 # add some observations stay[1:n,] <- NA # Time span of travel survey stay$date_start <- as.Date("2020-01-01") stay$date_stop <- stay$date_start + 30 stay$date_span <- difftime(trip$date_stop, trip$date_start, units='days') # Participant info stay$indiv_id <- sample(100:200, n) stay$indiv_age <- round(runif(n, 5, 80)) stay$indiv_sex <- rbinom(n, 1, 0.5) # Origin info stay$orig_adm0 <- stay$dest_adm0 <- 'A' stay$orig_adm1 <- stay$dest_adm1 <- 'A' stay$orig_adm2 <- stay$dest_adm2 <- 'A' stay$orig_adm3 <- stay$dest_adm3 <- LETTERS[1:n] stay$orig_type <- stay$dest_type <- 'Sub-district' stay$orig_x <- stay$dest_x <- rnorm(n, 100, 5) stay$orig_y <- stay$dest_y <- rnorm(n, 20, 2) stay$orig_pop <- stay$dest_pop <- rpois(n, 10000) stay$trips <- NA head(stay)#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 2020-01-01 2020-01-31 30 days 107 44 0 <NA> #> 2 2020-01-01 2020-01-31 30 days 102 76 1 <NA> #> 3 2020-01-01 2020-01-31 30 days 168 68 0 <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A A <NA> <NA> Sub-district #> 2 A A A B <NA> <NA> Sub-district #> 3 A A A C <NA> <NA> Sub-district #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 94.45019 19.53901 10098 A A A A <NA> #> 2 100.74703 20.78421 9928 A A A B <NA> #> 3 98.06929 20.93987 9959 A A A C <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> Sub-district 94.45019 19.53901 10098 NA #> 2 <NA> Sub-district 100.74703 20.78421 9928 NA #> 3 <NA> Sub-district 98.06929 20.93987 9959 NA#>#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 2020-01-01 2020-01-31 30 days 13 17 1 <NA> #> 2 2020-01-01 2020-01-31 30 days 10 9 1 <NA> #> 3 2020-01-01 2020-01-31 30 days 5 39 1 <NA> #> 4 2020-01-01 2020-01-31 30 days 107 44 0 <NA> #> 5 2020-01-01 2020-01-31 30 days 102 76 1 <NA> #> 6 2020-01-01 2020-01-31 30 days 168 68 0 <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A A <NA> <NA> Sub-district #> 2 A A A B <NA> <NA> Sub-district #> 3 A A A C <NA> <NA> Sub-district #> 4 A A A A <NA> <NA> Sub-district #> 5 A A A B <NA> <NA> Sub-district #> 6 A A A C <NA> <NA> Sub-district #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 100.30449 20.22459 10215 A A B D <NA> #> 2 89.11212 20.01577 10070 A A B E <NA> #> 3 99.41070 23.75549 10076 A A B F <NA> #> 4 94.45019 19.53901 10098 A A A A <NA> #> 5 100.74703 20.78421 9928 A A A B <NA> #> 6 98.06929 20.93987 9959 A A A C <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> Sub-district 98.45894 21.12676 5079 11 #> 2 <NA> Sub-district 105.06001 20.64497 4933 2 #> 3 <NA> Sub-district 95.40474 20.73335 5073 9 #> 4 <NA> Sub-district 94.45019 19.53901 10098 NA #> 5 <NA> Sub-district 100.74703 20.78421 9928 NA #> 6 <NA> Sub-district 98.06929 20.93987 9959 NA#---------------------------------------- # Dataset with which to extrapolate model #---------------------------------------- pred <- travel_data_template n <- 6 # Add some observations pred[1:n,] <- NA # Time span of the interval over which to extrapolate the fitted model pred$date_span <- as.difftime(7, units='days') # Origin info pred$orig_adm0 <- 'A' pred$orig_adm1 <- 'A' pred$orig_adm2 <- LETTERS[1:n] pred$orig_type <- 'District' # Type of admin unit for lowest admin level pred$orig_x <- rnorm(n, 100, 5) pred$orig_y <- rnorm(n, 20, 2) pred$orig_pop <- rpois(n, 1e+05) # Number of reported trips (unobserved for extrapolation data) trip$trips <- NA head(pred)#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 <NA> <NA> 7 NA NA NA <NA> #> 2 <NA> <NA> 7 NA NA NA <NA> #> 3 <NA> <NA> 7 NA NA NA <NA> #> 4 <NA> <NA> 7 NA NA NA <NA> #> 5 <NA> <NA> 7 NA NA NA <NA> #> 6 <NA> <NA> 7 NA NA NA <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A <NA> <NA> <NA> District #> 2 A A B <NA> <NA> <NA> District #> 3 A A C <NA> <NA> <NA> District #> 4 A A D <NA> <NA> <NA> District #> 5 A A E <NA> <NA> <NA> District #> 6 A A F <NA> <NA> <NA> District #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 105.23461 20.56950 99961 <NA> <NA> <NA> <NA> <NA> #> 2 93.94760 20.85719 100205 <NA> <NA> <NA> <NA> <NA> #> 3 101.90075 19.75807 100108 <NA> <NA> <NA> <NA> <NA> #> 4 99.66307 20.91648 100467 <NA> <NA> <NA> <NA> <NA> #> 5 101.40050 21.23307 100022 <NA> <NA> <NA> <NA> <NA> #> 6 101.65154 19.64990 100672 <NA> <NA> <NA> <NA> <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> <NA> NA NA NA NA #> 2 <NA> <NA> NA NA NA NA #> 3 <NA> <NA> NA NA NA NA #> 4 <NA> <NA> NA NA NA NA #> 5 <NA> <NA> NA NA NA NA #> 6 <NA> <NA> NA NA NA NA