Generalized template for travel data — travel_data

This template data frame provides a general structure for travel data that integrates with data synthesis and modeling functions. Stays (individuals reported as not traveling outside home location) are to be included in this data frame, where origin and destination are the same. Note that models fitted and then extrapolated using other data assume that the same method for defining population size is used throughout. Either dates or time span must be filled.

travel_data_template

Format

a data frame with empty columns and generalized column names

date_start: date: beginning of the time interval for the trip count
date_stop: date: end of the time interval for the trip count
date_span: integer: time span in days
indiv_id: integer: unique individual identifier
indiv_age: numeric: age of participant
indiv_sex: logical: gender of perticipant
indiv_type: factor: if individual participants belong to different groups
orig_adm0: character: name of highest administration level of origin location (Country)
orig_adm1: character: name of administration level 1 of origin location (e.g. Division, State)
orig_adm2: character: name of administration level 2 of origin location (e.g. District, County)
orig_adm3: character: name of administration level 3 of origin location (e.g. Sub-district, Province)
orig_adm4: character: name of administration level 4 of origin location (e.g. City, Municipality)
orig_adm5: character: name of administration level 5 of origin location (e.g. Town, Village, Community, Ward)
orig_type: character: administrative type for the origin location (e.g. sub-district, community vs town, or urban vs rural)
orig_x: numeric: longitude of origin location centroid in decimal degrees (centroid of smallest admin unit)
orig_y: numeric: latitude of origin location centroid in decimal degrees (centroid of smallest admin unit)
orig_pop: numeric: population size of lowest administrative unit for origin location
dest_adm0: character: name of highest administration level of destination location (Country)
dest_adm1: character: name of administration level 1 of destination location (e.g. Division, State)
dest_adm2: character: name of administration level 2 of destination location (e.g. District, County)
dest_adm3: character: name of administration level 3 of destination location (e.g. Sub-district, Province)
dest_adm4: character: name of administration level 4 of destination location (e.g. City, Municipality)
dest_adm5: character: name of administration level 5 of destination location (e.g. Town, Village, Community, Ward)
dest_type: character: administrative type for the destination location (e.g. sub-district, community vs town, or urban vs rural)
dest_x: numeric: longitude of destination location in decimal degrees (centroid of smallest admin unit)
dest_y: numeric: latitude of destination location centroid in decimal degrees (centroid of smallest admin unit)
dest_pop: numeric: population size of lowest administrative unit for destination location
trips: numeric: total number of observed trips made from origin to destination during time span

Author

John Giles

Examples

#--------------------------------
# Travel among locations
#--------------------------------

trip <- travel_data_template
n <- 3 # Add some observations
trip[1:n,] <- NA

# Time span of travel survey
trip$date_start <- as.Date("2020-01-01")
trip$date_stop <- trip$date_start + 30
trip$date_span <- difftime(trip$date_stop, trip$date_start, units='days')

# Participant info
trip$indiv_id <- sample(1:100, n)
trip$indiv_age <- round(runif(n, 5, 80))
trip$indiv_sex <- rbinom(n, 1, 0.5)

# Origin info
trip$orig_adm0 <- 'A'
trip$orig_adm1 <- 'A'
trip$orig_adm2 <- 'A'
trip$orig_adm3 <- LETTERS[1:n]
trip$orig_type <- 'Sub-district' # Type of admin unit for lowest admin level
trip$orig_x <- rnorm(n, 100, 5)
trip$orig_y <- rnorm(n, 20, 2)
trip$orig_pop <- rpois(n, 10000)

# Destination info
trip$dest_adm0 <- 'A'
trip$dest_adm1 <- 'A'
trip$dest_adm2 <- 'B'
trip$dest_adm3 <- LETTERS[(n+1):(n*2)]
trip$dest_type <- 'Sub-district' # Type of admin unit for lowest admin level
trip$dest_x <- rnorm(n, 100, 5)
trip$dest_y <- rnorm(n, 20, 2)
trip$dest_pop <- rpois(n, 5000)

# Number of reported trips
trip$trips <- rpois(n, 10)

head(trip)
#>   date_start  date_stop date_span indiv_id indiv_age indiv_sex indiv_type
#> 1 2020-01-01 2020-01-31   30 days       13        17         1       <NA>
#> 2 2020-01-01 2020-01-31   30 days       10         9         1       <NA>
#> 3 2020-01-01 2020-01-31   30 days        5        39         1       <NA>
#>   orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5    orig_type
#> 1         A         A         A         A      <NA>      <NA> Sub-district
#> 2         A         A         A         B      <NA>      <NA> Sub-district
#> 3         A         A         A         C      <NA>      <NA> Sub-district
#>      orig_x   orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4
#> 1 100.30449 20.22459    10215         A         A         B         D      <NA>
#> 2  89.11212 20.01577    10070         A         A         B         E      <NA>
#> 3  99.41070 23.75549    10076         A         A         B         F      <NA>
#>   dest_adm5    dest_type    dest_x   dest_y dest_pop trips
#> 1      <NA> Sub-district  98.45894 21.12676     5079    11
#> 2      <NA> Sub-district 105.06001 20.64497     4933     2
#> 3      <NA> Sub-district  95.40474 20.73335     5073     9



#-----------------------
# Stays in home location
#-----------------------

stay <- travel_data_template
n <- 3 # add some observations
stay[1:n,] <- NA

# Time span of travel survey
stay$date_start <- as.Date("2020-01-01")
stay$date_stop <- stay$date_start + 30
stay$date_span <- difftime(trip$date_stop, trip$date_start, units='days')

# Participant info
stay$indiv_id <- sample(100:200, n)
stay$indiv_age <- round(runif(n, 5, 80))
stay$indiv_sex <- rbinom(n, 1, 0.5)

# Origin info
stay$orig_adm0 <- stay$dest_adm0 <- 'A'
stay$orig_adm1 <- stay$dest_adm1 <- 'A'
stay$orig_adm2 <- stay$dest_adm2 <- 'A'
stay$orig_adm3 <- stay$dest_adm3 <- LETTERS[1:n]
stay$orig_type <- stay$dest_type <- 'Sub-district'
stay$orig_x <- stay$dest_x <- rnorm(n, 100, 5)
stay$orig_y <- stay$dest_y <- rnorm(n, 20, 2)
stay$orig_pop <- stay$dest_pop <- rpois(n, 10000)

stay$trips <- NA

head(stay)
#>   date_start  date_stop date_span indiv_id indiv_age indiv_sex indiv_type
#> 1 2020-01-01 2020-01-31   30 days      107        44         0       <NA>
#> 2 2020-01-01 2020-01-31   30 days      102        76         1       <NA>
#> 3 2020-01-01 2020-01-31   30 days      168        68         0       <NA>
#>   orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5    orig_type
#> 1         A         A         A         A      <NA>      <NA> Sub-district
#> 2         A         A         A         B      <NA>      <NA> Sub-district
#> 3         A         A         A         C      <NA>      <NA> Sub-district
#>      orig_x   orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4
#> 1  94.45019 19.53901    10098         A         A         A         A      <NA>
#> 2 100.74703 20.78421     9928         A         A         A         B      <NA>
#> 3  98.06929 20.93987     9959         A         A         A         C      <NA>
#>   dest_adm5    dest_type    dest_x   dest_y dest_pop trips
#> 1      <NA> Sub-district  94.45019 19.53901    10098    NA
#> 2      <NA> Sub-district 100.74703 20.78421     9928    NA
#> 3      <NA> Sub-district  98.06929 20.93987     9959    NA

# Combine
survey_data <- dplyr::full_join(trip, stay)
#> Joining, by = c("date_start", "date_stop", "date_span", "indiv_id", "indiv_age", "indiv_sex", "indiv_type", "orig_adm0", "orig_adm1", "orig_adm2", "orig_adm3", "orig_adm4", "orig_adm5", "orig_type", "orig_x", "orig_y", "orig_pop", "dest_adm0", "dest_adm1", "dest_adm2", "dest_adm3", "dest_adm4", "dest_adm5", "dest_type", "dest_x", "dest_y", "dest_pop", "trips")
head(survey_data)
#>   date_start  date_stop date_span indiv_id indiv_age indiv_sex indiv_type
#> 1 2020-01-01 2020-01-31   30 days       13        17         1       <NA>
#> 2 2020-01-01 2020-01-31   30 days       10         9         1       <NA>
#> 3 2020-01-01 2020-01-31   30 days        5        39         1       <NA>
#> 4 2020-01-01 2020-01-31   30 days      107        44         0       <NA>
#> 5 2020-01-01 2020-01-31   30 days      102        76         1       <NA>
#> 6 2020-01-01 2020-01-31   30 days      168        68         0       <NA>
#>   orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5    orig_type
#> 1         A         A         A         A      <NA>      <NA> Sub-district
#> 2         A         A         A         B      <NA>      <NA> Sub-district
#> 3         A         A         A         C      <NA>      <NA> Sub-district
#> 4         A         A         A         A      <NA>      <NA> Sub-district
#> 5         A         A         A         B      <NA>      <NA> Sub-district
#> 6         A         A         A         C      <NA>      <NA> Sub-district
#>      orig_x   orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4
#> 1 100.30449 20.22459    10215         A         A         B         D      <NA>
#> 2  89.11212 20.01577    10070         A         A         B         E      <NA>
#> 3  99.41070 23.75549    10076         A         A         B         F      <NA>
#> 4  94.45019 19.53901    10098         A         A         A         A      <NA>
#> 5 100.74703 20.78421     9928         A         A         A         B      <NA>
#> 6  98.06929 20.93987     9959         A         A         A         C      <NA>
#>   dest_adm5    dest_type    dest_x   dest_y dest_pop trips
#> 1      <NA> Sub-district  98.45894 21.12676     5079    11
#> 2      <NA> Sub-district 105.06001 20.64497     4933     2
#> 3      <NA> Sub-district  95.40474 20.73335     5073     9
#> 4      <NA> Sub-district  94.45019 19.53901    10098    NA
#> 5      <NA> Sub-district 100.74703 20.78421     9928    NA
#> 6      <NA> Sub-district  98.06929 20.93987     9959    NA



#----------------------------------------
# Dataset with which to extrapolate model
#----------------------------------------

pred <- travel_data_template
n <- 6 # Add some observations
pred[1:n,] <- NA

# Time span of the interval over which to extrapolate the fitted model
pred$date_span <- as.difftime(7, units='days')

# Origin info
pred$orig_adm0 <- 'A'
pred$orig_adm1 <- 'A'
pred$orig_adm2 <- LETTERS[1:n]
pred$orig_type <- 'District' # Type of admin unit for lowest admin level
pred$orig_x <- rnorm(n, 100, 5)
pred$orig_y <- rnorm(n, 20, 2)
pred$orig_pop <- rpois(n, 1e+05)

# Number of reported trips (unobserved for extrapolation data)
trip$trips <- NA

head(pred)
#>   date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type
#> 1       <NA>      <NA>         7       NA        NA        NA       <NA>
#> 2       <NA>      <NA>         7       NA        NA        NA       <NA>
#> 3       <NA>      <NA>         7       NA        NA        NA       <NA>
#> 4       <NA>      <NA>         7       NA        NA        NA       <NA>
#> 5       <NA>      <NA>         7       NA        NA        NA       <NA>
#> 6       <NA>      <NA>         7       NA        NA        NA       <NA>
#>   orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type
#> 1         A         A         A      <NA>      <NA>      <NA>  District
#> 2         A         A         B      <NA>      <NA>      <NA>  District
#> 3         A         A         C      <NA>      <NA>      <NA>  District
#> 4         A         A         D      <NA>      <NA>      <NA>  District
#> 5         A         A         E      <NA>      <NA>      <NA>  District
#> 6         A         A         F      <NA>      <NA>      <NA>  District
#>      orig_x   orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4
#> 1 105.23461 20.56950    99961      <NA>      <NA>      <NA>      <NA>      <NA>
#> 2  93.94760 20.85719   100205      <NA>      <NA>      <NA>      <NA>      <NA>
#> 3 101.90075 19.75807   100108      <NA>      <NA>      <NA>      <NA>      <NA>
#> 4  99.66307 20.91648   100467      <NA>      <NA>      <NA>      <NA>      <NA>
#> 5 101.40050 21.23307   100022      <NA>      <NA>      <NA>      <NA>      <NA>
#> 6 101.65154 19.64990   100672      <NA>      <NA>      <NA>      <NA>      <NA>
#>   dest_adm5 dest_type dest_x dest_y dest_pop trips
#> 1      <NA>      <NA>     NA     NA       NA    NA
#> 2      <NA>      <NA>     NA     NA       NA    NA
#> 3      <NA>      <NA>     NA     NA       NA    NA
#> 4      <NA>      <NA>     NA     NA       NA    NA
#> 5      <NA>      <NA>     NA     NA       NA    NA
#> 6      <NA>      <NA>     NA     NA       NA    NA