R实现地图绘制

科研
发布日期: 2025-02-02
文章字数: 2.4k
阅读时长: 14 分
# ---- Step 1: Install and Load Necessary Packages ----
# Ensure these packages are installed
# install.packages(c("sf", "ggplot2", "dplyr", "readr", "purrr", "ggspatial", "grid", "officer", "rvg"))

library(sf)
library(ggplot2)
library(dplyr)
library(readr)     # For read_csv
library(purrr)     # For map functional programming
library(ggspatial)
library(grid)
# library(showtext) # Removed showtext dependency
library(officer)   # Optional
library(rvg)       # Optional
# library(readxl)    # Optional

# ---- Step 2: Set Font ----
# Specify the desired font family directly in ggplot elements
# Ensure "Times New Roman" is available on your system
base_font_family <- "Times New Roman"

# ---- Step 3: Read geojson Files ----
cities_data <- st_read("China Map/中国_市.geojson")
districts_data <- st_read("China Map/中国_县.geojson")

# ---- Step 4: Define Target Cities and Districts ----
target_areas_cities_cn <- c("成都市", "绵阳市", "宜宾市", "德阳市", "南充市", "达州市", "泸州市", "乐山市", "自贡市", "眉山市", "遂宁市", "内江市", "广安市", "雅安市", "资阳市")
target_areas_districts_cn <- c("渝北区", "渝中区", "九龙坡区", "沙坪坝区", "南岸区", "江津区", "北碚区", "巴南区", "涪陵区", "璧山区", "万州区", "永川区", "长寿区", "合川区", "大渡口区", "铜梁区", "大足区", "綦江区", "荣창区", "开州区", "潼南区", "梁平区", "南川区", "云阳县", "忠县", "垫江县", "黔江区", "丰都县", "石柱土家族自治县", "彭水苗族土家族自治县", "武隆区", "城口县", "秀山土家族苗族自治县", "酉阳土家族苗族自治县", "江北区") # Ensure Jiangbei is here
target_areas_cities_en <- c("Chengdu", "Mianyang", "Yibin", "Deyang", "Nanchong", "Dazhou", "Luzhou", "Leshan", "Zigong", "Meishan", "Suining", "Neijiang", "Guang'an", "Ya'an", "Ziyang")
target_areas_districts_en <- c("Yubei", "Yuzhong", "Jiulongpo", "Shapingba", "Nan'an", "Jiangjin", "Beibei", "Banan", "Fuling", "Bishan", "Wanzhou", "Yongchuan", "Changshou", "Hechuan", "Dadukou", "Tongliang", "Dazu", "Qijiang", "Rongchang", "Kaizhou", "Tongnan", "Liangping", "Nanchuan", "Yunyang", "Zhongxian", "Dianjiang", "Qianjiang", "Fengdu", "Shizhu", "Pengshui", "Wulong", "Chengkou", "Xiushan", "Youyang", "Jiangbei") # Add Jiangbei EN
gb_code_for_jiangbei <- '156500105'

# ---- Step 5: Filter Geographic Data ----
sichuan_cities <- cities_data %>% filter(name %in% target_areas_cities_cn)
chongqing_districts <- districts_data %>% filter((name %in% target_areas_districts_cn & name != "江北区") | (name == "江北区" & gb == gb_code_for_jiangbei))
sichuan_chongqing <- rbind(sichuan_cities, chongqing_districts) %>% distinct(name, .keep_all = TRUE) %>% mutate(name = trimws(name)) # Trim names early

# ---- Step 6: Calculate Average Node Strength (2020-2024) ----
avg_node_strength_data <- NULL # Initialize
tryCatch({
  years <- 2020:2024
  base_path <- "Python/Output Data" # Base path for data folders
  source_col_name <- "Source"
  target_col_name <- "Target"
  prob_col_name   <- "Weight"
  
  # Function to read and process data for a single year
  read_edge_data <- function(year, base_path, src_col, trg_col, prob_col) {
    file_path <- file.path(base_path, as.character(year), "边数据.csv")
    if (!file.exists(file_path)) {
      warning("File not found for year ", year, ": ", file_path)
      return(NULL) # Return NULL if file doesn't exist
    }
    tryCatch({
      df <- read_csv(file_path, show_col_types = FALSE)
      # Check for required columns
      if (!all(c(src_col, trg_col, prob_col) %in% names(df))) {
        warning("Missing required columns in file for year ", year, ": ", file_path)
        return(NULL)
      }
      df %>%
        rename(source = !!sym(src_col), target = !!sym(trg_col), prob = !!sym(prob_col)) %>%
        mutate(source = trimws(source), target = trimws(target)) %>%
        # Ensure probability is numeric, coerce non-numeric to NA
        mutate(prob = as.numeric(prob)) %>%
        filter(!is.na(prob)) %>% # Remove rows where probability couldn't be read as numeric
        select(source, target, prob) %>%
        mutate(year = year) %>% # Add year column
        # Create a unique pair ID, sorting source/target to handle directionality
        mutate(pair_id = map2_chr(source, target, ~ paste(sort(c(.x, .y)), collapse = "-")))
    }, error = function(e) {
      warning("Error reading or processing file for year ", year, ": ", file_path, " - ", e$message)
      return(NULL)
    })
  }
  
  # Read all yearly data into a single data frame
  all_years_edges <- map_dfr(years, ~read_edge_data(.x, base_path, source_col_name, target_col_name, prob_col_name))
  
  if (nrow(all_years_edges) == 0) {
    stop("No valid edge data could be read from any year.")
  }
  
  message("Read data for ", length(unique(all_years_edges$year)), " years.")
  
  # Calculate the average probability for each unique edge pair across all years
  # Ensure source and target are consistent for each pair_id before averaging
  consistent_pairs <- all_years_edges %>%
    select(pair_id, source, target) %>%
    distinct(pair_id, .keep_all = TRUE) # Get one consistent source/target for each pair_id
  
  average_edges <- all_years_edges %>%
    group_by(pair_id) %>%
    # Calculate the mean probability for each pair, handling potential NA values if a pair doesn't exist in a year
    summarise(average_prob = mean(prob, na.rm = TRUE), .groups = 'drop') %>%
    # Replace NaN with 0 (can happen if a pair exists but only has NA probabilities, though filter step should prevent this)
    mutate(average_prob = ifelse(is.nan(average_prob), 0, average_prob)) %>%
    filter(average_prob > 0) %>% # Keep only edges with positive average strength
    # Join back the consistent source/target names
    left_join(consistent_pairs, by = "pair_id") %>%
    select(source, target, average_prob) # Keep relevant columns
  
  
  if (nrow(average_edges) == 0) {
    stop("No edges with positive average probability found.")
  }
  
  message("Calculated average probability for ", nrow(average_edges), " unique edges.")
  
  # Calculate Average Node Strength
  # Sum the average strength for each node (appearing as source or target)
  strength_source <- average_edges %>% select(node = source, strength = average_prob)
  strength_target <- average_edges %>% select(node = target, strength = average_prob)
  
  avg_node_strength_data <- bind_rows(strength_source, strength_target) %>%
    group_by(node) %>%
    summarise(avg_node_strength = sum(strength), .groups = 'drop') %>%
    rename(name = node) # Rename column to 'name' for joining
  
  message("Successfully calculated average node strength (2020-2024) for ", nrow(avg_node_strength_data), " nodes.")
  
}, error = function(e) {
  warning("Error calculating average node strength (2020-2024): ", e$message, "\nMap fill will be missing.")
  avg_node_strength_data <- NULL # Ensure it's NULL on error
})

# ---- Step 7: Merge Average Node Strength to Geographical Data ----
if (!is.null(avg_node_strength_data)) {
  sichuan_chongqing <- sichuan_chongqing %>%
    left_join(avg_node_strength_data, by = "name")
} else {
  # Add a dummy column if calculation failed, to avoid errors later
  sichuan_chongqing$avg_node_strength <- NA_real_ # Use numeric NA
  warning("Using NA for node strength due to calculation error.")
}


# ---- Step 8: Calculate Centroids and Prepare Labels ----
# Use st_point_on_surface for potentially better placement inside polygons
sichuan_chongqing_centroids <- suppressWarnings(st_point_on_surface(sichuan_chongqing))

# Manual Adjustments (keep if needed)
suppressWarnings({
  # Find indices safely
  idx_luzhou <- which(sichuan_chongqing_centroids$name == "泸州市")
  idx_yubei <- which(sichuan_chongqing_centroids$name == "渝北区")
  idx_meishan <- which(sichuan_chongqing_centroids$name == "眉山市")
  
  if(length(idx_luzhou) > 0) sichuan_chongqing_centroids[idx_luzhou, ]$geometry <- st_geometry(sichuan_chongqing_centroids[idx_luzhou, ]) + c(-0.23, 0.23)
  if(length(idx_yubei) > 0) sichuan_chongqing_centroids[idx_yubei, ]$geometry <- st_geometry(sichuan_chongqing_centroids[idx_yubei, ]) + c(0.03, -0.03)
  if(length(idx_meishan) > 0) sichuan_chongqing_centroids[idx_meishan, ]$geometry <- st_geometry(sichuan_chongqing_centroids[idx_meishan, ]) + c(0, 0.1)
})

# ---- Step 9: Define and Process Labels (English with Numbered Core) ----
# Combine CN and EN names for mapping (ensure Jiangbei is included)
name_map_cn <- c(target_areas_cities_cn, target_areas_districts_cn, "江北区")
name_map_en <- c(target_areas_cities_en, target_areas_districts_en, "Jiangbei") # Make sure Jiangbei is here
name_mapping <- setNames(name_map_en, name_map_cn) %>% .[!duplicated(names(.))] # Create named vector, remove duplicate CN names if any

# Define the core Chongqing districts for numbering
chongqing_core_cn <- c("江北区", "沙坪坝区", "九龙坡区", "渝中区", "大渡口区", "南岸区")
chongqing_core_labels <- c("1", "2", "3", "4", "5", "6")
core_mapping <- setNames(chongqing_core_labels, chongqing_core_cn)

# Apply mapping: use number if core, else use English name from mapping, else original name
sichuan_chongqing_centroids <- sichuan_chongqing_centroids %>%
  mutate(label = case_when(
    name %in% chongqing_core_cn ~ recode(name, !!!core_mapping),
    name %in% names(name_mapping) ~ recode(name, !!!name_mapping),
    TRUE ~ name # Fallback to original name if no mapping found
  ))

# ---- Step 10: Calculate Shared Boundary ----
shared_boundary <- NULL
tryCatch({
  # Ensure input layers are valid before proceeding
  if(!all(st_is_valid(sichuan_cities))) sichuan_cities <- st_make_valid(sichuan_cities)
  if(!all(st_is_valid(chongqing_districts))) chongqing_districts <- st_make_valid(chongqing_districts)
  
  sichuan_cities_proj <- st_transform(sichuan_cities, crs = 3857) # Use a projected CRS for geometric operations
  chongqing_districts_proj <- st_transform(chongqing_districts, crs = 3857)
  
  sichuan_boundary_proj <- suppressMessages(suppressWarnings(st_union(sichuan_cities_proj) %>% st_boundary()))
  chongqing_boundary_proj <- suppressMessages(suppressWarnings(st_union(chongqing_districts_proj) %>% st_boundary()))
  
  # Perform intersection only if boundaries are valid and not empty
  if(!st_is_empty(sichuan_boundary_proj) && !st_is_empty(chongqing_boundary_proj) && st_is_valid(sichuan_boundary_proj) && st_is_valid(chongqing_boundary_proj)){
    shared_boundary_proj <- suppressMessages(suppressWarnings(st_intersection(sichuan_boundary_proj, chongqing_boundary_proj)))
    if (length(shared_boundary_proj) > 0 && !st_is_empty(shared_boundary_proj)) {
      shared_boundary <- st_transform(shared_boundary_proj, crs = 4326) # Transform back to WGS84
      message("Shared boundary calculated.")
    } else {
      message("No shared boundary found or intersection empty.")
    }
  } else {
    message("Could not calculate shared boundary due to invalid or empty input boundaries.")
  }
}, error = function(e){
  warning("Could not calculate shared boundary: ", e$message)
})

# ---- Step 11: Ensure Correct CRS for Plotting ----
sichuan_chongqing <- st_transform(sichuan_chongqing, crs = 4326)
sichuan_chongqing_centroids <- st_transform(sichuan_chongqing_centroids, crs = 4326)
# shared_boundary is already transformed if calculated successfully

# ---- Step 12: Plot the Map ----
map_plot <- ggplot() +
  # Base Map: Fill by Average Node Strength
  geom_sf(data = sichuan_chongqing,
          aes(fill = avg_node_strength), # Use calculated average node strength
          color = "gray50", # Boundary color for regions
          linewidth = 0.2) +
  
  # Shared Boundary (if calculated)
  {
    if (!is.null(shared_boundary) && inherits(shared_boundary, "sf") && nrow(shared_boundary) > 0 && !st_is_empty(shared_boundary))
      geom_sf(data = shared_boundary, color = "black", linewidth = 0.8)
  } +
  
  # Text Labels
  geom_sf_text(data = sichuan_chongqing_centroids,
               aes(label = label),
               size = 2.5, # Adjust size as needed
               color = "black",
               family = base_font_family, # Use Times New Roman
               check_overlap = FALSE) + # Or use ggrepel::geom_text_repel
  
  # North Arrow
  annotation_north_arrow(location = "tl", which_north = "true",
                         pad_x = unit(0.3, "cm"), pad_y = unit(0.9, "cm"),
                         height = unit(1.2, "cm"), width = unit(1.2, "cm"),
                         style = north_arrow_fancy_orienteering) +
  
  # Scale Bar
  annotation_scale(location = "bl", width_hint = 0.3, # Bottom-left
                   pad_x = unit(0.6, "cm"), pad_y = unit(0.6, "cm"),
                   text_family = base_font_family) + # Use Times New Roman
  
  # Color Scale for Node Strength
  scale_fill_gradient(name = "Average Node Strength\n(2020-2024)", # Legend Title
                      low = "#FDE7D7", high = "#F08B5D", # Orange gradient
                      na.value = "grey90", # Color for NA values
                      n.breaks = 5) + # Adjust number of breaks in legend
  
  # Labels and Theme
  labs(title = "Sichuan-Chongqing Area: Average Node Strength (2020-2024)") +
  theme_minimal(base_family = base_font_family) + # Use Times New Roman
  theme(
    plot.title = element_text(hjust = 0.5, size=14, face="bold"),
    panel.grid.major = element_line(color = "grey85", linetype = "dashed", linewidth = 0.3),
    panel.grid.minor = element_blank(),
    axis.title = element_blank(),
    axis.text = element_text(color = "black", size = 9, family=base_font_family), # Font for axis text
    axis.ticks = element_line(color = "grey70"),
    legend.title = element_text(size = 9, face = "bold", family=base_font_family), # Font for legend title
    legend.text = element_text(size = 8, family=base_font_family), # Font for legend text
    legend.position = "right",
    plot.margin = unit(c(0.5, 2.5, 0.5, 0.5), "cm") # Adjust right margin for annotation
  )

# ---- Step 13: Add Custom Annotation for Numbered Labels ----
# Define annotation text (ensure names match numbered labels 1-6)
annotation_text <- "Numbered Districts:\n1: Jiangbei\n2: Shapingba\n3: Jiulongpo\n4: Yuzhong\n5: Dadukou\n6: Nan'an"

# Create grob with Times New Roman font
annotation_grob <- textGrob(
  label = annotation_text,
  hjust = 0, vjust = 0,
  gp = gpar(fontsize = 9, fontfamily = base_font_family, lineheight = 1.3) # Use Times New Roman
)

# Add annotation outside plot area
# Using annotation_custom with Inf/-Inf and clip="off"
final_plot <- map_plot +
  annotation_custom(
    grob = annotation_grob,
    xmin = Inf, xmax = Inf, ymin = -Inf, ymax = -Inf # Place outside data range
  ) +
  coord_sf(crs = 4326, clip = "off", expand = TRUE) # Allow drawing outside, expand=TRUE might help positioning annotation


# ---- Step 14: Print and Save ----
# Print to RStudio plotting window
print(final_plot)

# Save to PPT (Optional)
# Make sure 'final_plot' object is used
# try({
#   ppt_path <- "images/Sichuan_Chongqing_Avg_Node_Strength_Map_2020_2024.pptx"
#   ppt <- read_pptx()
#   ppt <- add_slide(ppt, layout = "Blank", master = "Office Theme")
#   # Ensure rvg is loaded if using dml
#   plot_dml <- rvg::dml(ggobj = final_plot)
#   ppt <- ph_with(ppt, value = plot_dml, location = ph_location_fullsize())
#   print(ppt, target = ppt_path)
#   message("Plot saved to: ", ppt_path)
# }, error = function(e){
#   warning("Could not save to PPT: ", e$message)
# })

# Save as PNG (Optional)
# ggsave("images/Sichuan_Chongqing_Avg_Node_Strength_Map_2020_2024.png", plot = final_plot, width = 11, height = 8, dpi = 300, bg = "white")