This template data frame provides a general structure for travel data that integrates with data synthesis and modeling functions. Stays (individuals reported as not traveling outside home location) are to be included in this data frame, where origin and destination are the same. Note that models fitted and then extrapolated using other data assume that the same method for defining population size is used throughout. Either dates or time span must be filled.

travel_data_template

Format

a data frame with empty columns and generalized column names

date_start

date: beginning of the time interval for the trip count

date_stop

date: end of the time interval for the trip count

date_span

integer: time span in days

indiv_id

integer: unique individual identifier

indiv_age

numeric: age of participant

indiv_sex

logical: gender of perticipant

indiv_type

factor: if individual participants belong to different groups

orig_adm0

character: name of highest administration level of origin location (Country)

orig_adm1

character: name of administration level 1 of origin location (e.g. Division, State)

orig_adm2

character: name of administration level 2 of origin location (e.g. District, County)

orig_adm3

character: name of administration level 3 of origin location (e.g. Sub-district, Province)

orig_adm4

character: name of administration level 4 of origin location (e.g. City, Municipality)

orig_adm5

character: name of administration level 5 of origin location (e.g. Town, Village, Community, Ward)

orig_type

character: administrative type for the origin location (e.g. sub-district, community vs town, or urban vs rural)

orig_x

numeric: longitude of origin location centroid in decimal degrees (centroid of smallest admin unit)

orig_y

numeric: latitude of origin location centroid in decimal degrees (centroid of smallest admin unit)

orig_pop

numeric: population size of lowest administrative unit for origin location

dest_adm0

character: name of highest administration level of destination location (Country)

dest_adm1

character: name of administration level 1 of destination location (e.g. Division, State)

dest_adm2

character: name of administration level 2 of destination location (e.g. District, County)

dest_adm3

character: name of administration level 3 of destination location (e.g. Sub-district, Province)

dest_adm4

character: name of administration level 4 of destination location (e.g. City, Municipality)

dest_adm5

character: name of administration level 5 of destination location (e.g. Town, Village, Community, Ward)

dest_type

character: administrative type for the destination location (e.g. sub-district, community vs town, or urban vs rural)

dest_x

numeric: longitude of destination location in decimal degrees (centroid of smallest admin unit)

dest_y

numeric: latitude of destination location centroid in decimal degrees (centroid of smallest admin unit)

dest_pop

numeric: population size of lowest administrative unit for destination location

trips

numeric: total number of observed trips made from origin to destination during time span

Author

John Giles

Examples

#-------------------------------- # Travel among locations #-------------------------------- trip <- travel_data_template n <- 3 # Add some observations trip[1:n,] <- NA # Time span of travel survey trip$date_start <- as.Date("2020-01-01") trip$date_stop <- trip$date_start + 30 trip$date_span <- difftime(trip$date_stop, trip$date_start, units='days') # Participant info trip$indiv_id <- sample(1:100, n) trip$indiv_age <- round(runif(n, 5, 80)) trip$indiv_sex <- rbinom(n, 1, 0.5) # Origin info trip$orig_adm0 <- 'A' trip$orig_adm1 <- 'A' trip$orig_adm2 <- 'A' trip$orig_adm3 <- LETTERS[1:n] trip$orig_type <- 'Sub-district' # Type of admin unit for lowest admin level trip$orig_x <- rnorm(n, 100, 5) trip$orig_y <- rnorm(n, 20, 2) trip$orig_pop <- rpois(n, 10000) # Destination info trip$dest_adm0 <- 'A' trip$dest_adm1 <- 'A' trip$dest_adm2 <- 'B' trip$dest_adm3 <- LETTERS[(n+1):(n*2)] trip$dest_type <- 'Sub-district' # Type of admin unit for lowest admin level trip$dest_x <- rnorm(n, 100, 5) trip$dest_y <- rnorm(n, 20, 2) trip$dest_pop <- rpois(n, 5000) # Number of reported trips trip$trips <- rpois(n, 10) head(trip)
#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 2020-01-01 2020-01-31 30 days 13 17 1 <NA> #> 2 2020-01-01 2020-01-31 30 days 10 9 1 <NA> #> 3 2020-01-01 2020-01-31 30 days 5 39 1 <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A A <NA> <NA> Sub-district #> 2 A A A B <NA> <NA> Sub-district #> 3 A A A C <NA> <NA> Sub-district #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 100.30449 20.22459 10215 A A B D <NA> #> 2 89.11212 20.01577 10070 A A B E <NA> #> 3 99.41070 23.75549 10076 A A B F <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> Sub-district 98.45894 21.12676 5079 11 #> 2 <NA> Sub-district 105.06001 20.64497 4933 2 #> 3 <NA> Sub-district 95.40474 20.73335 5073 9
#----------------------- # Stays in home location #----------------------- stay <- travel_data_template n <- 3 # add some observations stay[1:n,] <- NA # Time span of travel survey stay$date_start <- as.Date("2020-01-01") stay$date_stop <- stay$date_start + 30 stay$date_span <- difftime(trip$date_stop, trip$date_start, units='days') # Participant info stay$indiv_id <- sample(100:200, n) stay$indiv_age <- round(runif(n, 5, 80)) stay$indiv_sex <- rbinom(n, 1, 0.5) # Origin info stay$orig_adm0 <- stay$dest_adm0 <- 'A' stay$orig_adm1 <- stay$dest_adm1 <- 'A' stay$orig_adm2 <- stay$dest_adm2 <- 'A' stay$orig_adm3 <- stay$dest_adm3 <- LETTERS[1:n] stay$orig_type <- stay$dest_type <- 'Sub-district' stay$orig_x <- stay$dest_x <- rnorm(n, 100, 5) stay$orig_y <- stay$dest_y <- rnorm(n, 20, 2) stay$orig_pop <- stay$dest_pop <- rpois(n, 10000) stay$trips <- NA head(stay)
#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 2020-01-01 2020-01-31 30 days 107 44 0 <NA> #> 2 2020-01-01 2020-01-31 30 days 102 76 1 <NA> #> 3 2020-01-01 2020-01-31 30 days 168 68 0 <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A A <NA> <NA> Sub-district #> 2 A A A B <NA> <NA> Sub-district #> 3 A A A C <NA> <NA> Sub-district #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 94.45019 19.53901 10098 A A A A <NA> #> 2 100.74703 20.78421 9928 A A A B <NA> #> 3 98.06929 20.93987 9959 A A A C <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> Sub-district 94.45019 19.53901 10098 NA #> 2 <NA> Sub-district 100.74703 20.78421 9928 NA #> 3 <NA> Sub-district 98.06929 20.93987 9959 NA
# Combine survey_data <- dplyr::full_join(trip, stay)
#> Joining, by = c("date_start", "date_stop", "date_span", "indiv_id", "indiv_age", "indiv_sex", "indiv_type", "orig_adm0", "orig_adm1", "orig_adm2", "orig_adm3", "orig_adm4", "orig_adm5", "orig_type", "orig_x", "orig_y", "orig_pop", "dest_adm0", "dest_adm1", "dest_adm2", "dest_adm3", "dest_adm4", "dest_adm5", "dest_type", "dest_x", "dest_y", "dest_pop", "trips")
head(survey_data)
#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 2020-01-01 2020-01-31 30 days 13 17 1 <NA> #> 2 2020-01-01 2020-01-31 30 days 10 9 1 <NA> #> 3 2020-01-01 2020-01-31 30 days 5 39 1 <NA> #> 4 2020-01-01 2020-01-31 30 days 107 44 0 <NA> #> 5 2020-01-01 2020-01-31 30 days 102 76 1 <NA> #> 6 2020-01-01 2020-01-31 30 days 168 68 0 <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A A <NA> <NA> Sub-district #> 2 A A A B <NA> <NA> Sub-district #> 3 A A A C <NA> <NA> Sub-district #> 4 A A A A <NA> <NA> Sub-district #> 5 A A A B <NA> <NA> Sub-district #> 6 A A A C <NA> <NA> Sub-district #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 100.30449 20.22459 10215 A A B D <NA> #> 2 89.11212 20.01577 10070 A A B E <NA> #> 3 99.41070 23.75549 10076 A A B F <NA> #> 4 94.45019 19.53901 10098 A A A A <NA> #> 5 100.74703 20.78421 9928 A A A B <NA> #> 6 98.06929 20.93987 9959 A A A C <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> Sub-district 98.45894 21.12676 5079 11 #> 2 <NA> Sub-district 105.06001 20.64497 4933 2 #> 3 <NA> Sub-district 95.40474 20.73335 5073 9 #> 4 <NA> Sub-district 94.45019 19.53901 10098 NA #> 5 <NA> Sub-district 100.74703 20.78421 9928 NA #> 6 <NA> Sub-district 98.06929 20.93987 9959 NA
#---------------------------------------- # Dataset with which to extrapolate model #---------------------------------------- pred <- travel_data_template n <- 6 # Add some observations pred[1:n,] <- NA # Time span of the interval over which to extrapolate the fitted model pred$date_span <- as.difftime(7, units='days') # Origin info pred$orig_adm0 <- 'A' pred$orig_adm1 <- 'A' pred$orig_adm2 <- LETTERS[1:n] pred$orig_type <- 'District' # Type of admin unit for lowest admin level pred$orig_x <- rnorm(n, 100, 5) pred$orig_y <- rnorm(n, 20, 2) pred$orig_pop <- rpois(n, 1e+05) # Number of reported trips (unobserved for extrapolation data) trip$trips <- NA head(pred)
#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 <NA> <NA> 7 NA NA NA <NA> #> 2 <NA> <NA> 7 NA NA NA <NA> #> 3 <NA> <NA> 7 NA NA NA <NA> #> 4 <NA> <NA> 7 NA NA NA <NA> #> 5 <NA> <NA> 7 NA NA NA <NA> #> 6 <NA> <NA> 7 NA NA NA <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A <NA> <NA> <NA> District #> 2 A A B <NA> <NA> <NA> District #> 3 A A C <NA> <NA> <NA> District #> 4 A A D <NA> <NA> <NA> District #> 5 A A E <NA> <NA> <NA> District #> 6 A A F <NA> <NA> <NA> District #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 105.23461 20.56950 99961 <NA> <NA> <NA> <NA> <NA> #> 2 93.94760 20.85719 100205 <NA> <NA> <NA> <NA> <NA> #> 3 101.90075 19.75807 100108 <NA> <NA> <NA> <NA> <NA> #> 4 99.66307 20.91648 100467 <NA> <NA> <NA> <NA> <NA> #> 5 101.40050 21.23307 100022 <NA> <NA> <NA> <NA> <NA> #> 6 101.65154 19.64990 100672 <NA> <NA> <NA> <NA> <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> <NA> NA NA NA NA #> 2 <NA> <NA> NA NA NA NA #> 3 <NA> <NA> NA NA NA NA #> 4 <NA> <NA> NA NA NA NA #> 5 <NA> <NA> NA NA NA NA #> 6 <NA> <NA> NA NA NA NA