R Polygon Grouping – Grouping Polygons That Touch and Sum to ~1 Using R

polygonrsf

Looking for a solution in R to group polygons that (1) touch each other AND (2) sum to 1 (or as close to 1 as possible given the data). Understandably the data may not allow for perfect grouping, but I need to identify the greatest number of groupings that follow the above two rules.

If there are "left-over" polygons that don't quite sum to 1, they can simply be added into the nearest group. I understand that it may be a bit of a vague rule-set, but it's a real-world example so I seek some advice.

My spatial polygon object is in sf format.

enter image description here

Reproducible object (which I simplified a bit to reduce the size of the object):

structure(list(lprd_offtk = c(0.576595614111913, 0.470596431324882, 
0.313013556438852, 0.291847827347769, 0.246893506907835, 0.185238421883191, 
0.182961201600402, 0.0411038710600083), groups = c(1, 1, 1, 1, 
1, 1, 1, 1), geometry = structure(list(structure(list(structure(c(249875.059541146, 
252012.035024516, 258685.257224514, 265376.684035709, 267738.088198923, 
275908.662976789, 281064.254433625, 279786.132666357, 272904.492138655, 
269709.525939596, 267311.523306026, 253821.487885182, 248648.505625685, 
232693.413456437, 219199.876598215, 213428.24184839, 218463.195819764, 
217154.422178502, 217499.473797489, 219490.715872187, 225421.030054584, 
227755.855655569, 234327.352083757, 239973.434780013, 246457.027469004, 
249875.059541146, -2494700.2653827, -2496056.45867386, -2489409.8295265, 
-2488249.04572909, -2492688.35003626, -2496804.08657781, -2503136.56846075, 
-2504891.76738967, -2504886.38095311, -2506784.91500093, -2511347.68966855, 
-2519728.1790659, -2529913.10281212, -2540847.16443872, -2539940.8520852, 
-2538048.37233691, -2532106.29000754, -2529129.92905356, -2517321.76768076, 
-2512825.00939204, -2512522.86185769, -2509495.51025011, -2508897.25719341, 
-2506040.65741496, -2500724.41107138, -2494700.2653827), .Dim = c(26L, 
2L))), class = c("XY", "POLYGON", "sfg")), structure(list(structure(c(281064.254433625, 
284721.06606518, 279569.016927732, 282094.14898332, 281549.481661996, 
284157.142691013, 282305.321387715, 283204.573710021, 273726.267363972, 
269374.744023395, 267870.238560712, 266222.58758153, 256514.840665134, 
258248.422578011, 256043.736801008, 255083.684369923, 247588.829402245, 
242499.566458413, 239089.579903093, 235412.606442071, 232693.413456437, 
248648.505625685, 253821.487885182, 267311.523306026, 271048.972508793, 
279786.132666357, 281064.254433625, -2503136.56846075, -2516760.32569946, 
-2518872.10133995, -2520773.76435034, -2524275.20063014, -2530448.22436733, 
-2532136.38788482, -2537039.64229238, -2538755.21593081, -2538507.46831983, 
-2536692.85843521, -2538552.82296956, -2539217.17692106, -2543393.3685468, 
-2543839.44393153, -2548626.33101899, -2548550.97475263, -2551056.56970986, 
-2548313.03575587, -2548248.7328639, -2540847.16443872, -2529913.10281212, 
-2519728.1790659, -2511347.68966855, -2505846.46760906, -2504891.76738967, 
-2503136.56846075), .Dim = c(27L, 2L))), class = c("XY", "POLYGON", 
"sfg")), structure(list(structure(c(232693.413456437, 235412.606442071, 
239089.579903093, 244827.215235134, 242031.464922637, 231806.447551693, 
228328.092368238, 225160.026743837, 225673.702796975, 223775.699904158, 
222347.166277312, 218647.66260595, 214552.65515603, 204583.843831019, 
192032.247618289, 200080.060096766, 200277.787113699, 191486.447821016, 
189540.547330367, 188622.34734284, 191180.019984731, 191764.263346018, 
194439.83928237, 193200.560979109, 197259.461397278, 203059.868545647, 
205319.554316276, 210196.492259697, 209272.229830089, 213428.24184839, 
219199.876598215, 232693.413456437, -2540847.16443872, -2548248.7328639, 
-2548313.03575587, -2554139.64623601, -2562157.82691972, -2572652.06161707, 
-2580661.46857492, -2582586.73282374, -2590361.64630144, -2600768.83961439, 
-2601118.96791017, -2596549.23096386, -2595908.84828633, -2587558.31098132, 
-2582008.47163027, -2578878.15330717, -2576213.00214426, -2576060.33555871, 
-2573428.41342399, -2566314.3667436, -2565176.09049602, -2562132.22863948, 
-2562197.32229417, -2559373.61809678, -2555845.10698301, -2553173.01189052, 
-2554712.86185243, -2550544.36669981, -2545872.33950869, -2538048.37233691, 
-2539940.8520852, -2540847.16443872), .Dim = c(32L, 2L))), class = c("XY", 
"POLYGON", "sfg")), structure(list(structure(c(293014.518153712, 
302663.623878803, 299614.736129523, 299387.143977218, 300713.765833193, 
296012.630953997, 296414.646404753, 291613.202872935, 290101.086700019, 
288351.994932675, 280050.141744657, 276348.36159945, 268239.242818116, 
266664.696781647, 261329.775210368, 260803.076643104, 255043.118691357, 
256043.736801008, 258248.422578011, 256514.840665134, 266222.58758153, 
267870.238560712, 269719.998476136, 278115.141692842, 279027.238226489, 
277046.169367799, 278663.587566502, 278390.166162141, 282776.00453892, 
293014.518153712, -2545938.99223406, -2547984.25248741, -2552335.30469973, 
-2556748.93411025, -2557918.93443286, -2563255.41718524, -2568141.48940461, 
-2573873.67048764, -2580713.55216644, -2582358.27226944, -2575126.00563387, 
-2567008.30844017, -2561285.47027227, -2554370.63011131, -2552687.15762284, 
-2548859.05347352, -2548368.6934926, -2543839.44393153, -2543393.3685468, 
-2539217.17692106, -2538552.82296956, -2536692.85843521, -2538627.15944761, 
-2538204.23532964, -2539662.47951233, -2542253.48922496, -2543160.74876303, 
-2545303.54715771, -2547900.42101953, -2545938.99223406), .Dim = c(30L, 
2L))), class = c("XY", "POLYGON", "sfg")), structure(list(structure(c(306327.164679465, 
312157.87829059, 315727.813056016, 322247.863289479, 324734.234123038, 
322280.899985046, 313282.956380406, 313182.865420436, 308893.020782966, 
297419.243993161, 296690.990207211, 294621.852014004, 291701.900610509, 
291614.741438732, 287419.223157528, 287044.692168127, 283344.246238729, 
282305.321387715, 284157.142691013, 281549.481661996, 282094.14898332, 
279695.910810639, 296955.864084469, 300889.267765777, 306327.164679465, 
-2508039.61053055, -2510727.11046957, -2509609.55377647, -2510558.39775827, 
-2513719.50300778, -2516967.49656351, -2518999.15235832, -2524753.05450383, 
-2529273.04419919, -2529592.16669518, -2525868.72839632, -2531644.56754877, 
-2533950.26964505, -2536931.8001659, -2537502.94552915, -2534657.51917214, 
-2536843.47016035, -2532136.38788482, -2530448.22436733, -2524275.20063014, 
-2520773.76435034, -2518693.05659809, -2515926.86985414, -2511082.96444285, 
-2508039.61053055), .Dim = c(25L, 2L))), class = c("XY", "POLYGON", 
"sfg")), structure(list(structure(c(242499.566458413, 247588.829402245, 
258286.862425668, 260803.076643104, 261329.775210368, 264273.044612322, 
258473.97206371, 259983.568923105, 255154.98928701, 256902.238131262, 
255753.861067199, 247571.580948403, 235663.721895586, 225742.204227351, 
225435.119550552, 228328.092368238, 231806.447551693, 242144.157713471, 
244827.215235134, 242499.566458413, -2551056.56970986, -2548550.97475263, 
-2548187.135643, -2548859.05347352, -2552687.15762284, -2554049.32231806, 
-2560167.55876203, -2568074.91322253, -2577506.8627014, -2589015.02513639, 
-2590721.649419, -2587110.19199153, -2585872.89272702, -2588552.69485312, 
-2582111.24615956, -2580661.46857492, -2572652.06161707, -2561940.41176137, 
-2554139.64623601, -2551056.56970986), .Dim = c(20L, 2L))), class = c("XY", 
"POLYGON", "sfg")), structure(list(structure(c(288351.994932675, 
287495.305756137, 276680.392697904, 270919.365058707, 269340.771068311, 
265346.889957803, 255230.477628228, 254108.340437253, 256902.238131262, 
255154.98928701, 259983.568923105, 258651.712140395, 263994.051303531, 
264273.044612322, 267106.535022276, 268239.242818116, 276348.36159945, 
280050.141744657, 288351.994932675, -2582358.27226944, -2585779.41018566, 
-2591654.24876776, -2591005.29640292, -2594986.78395064, -2598683.40780428, 
-2605133.21307845, -2593026.46124507, -2589015.02513639, -2577506.8627014, 
-2568074.91322253, -2559477.35553307, -2555735.53509041, -2554049.32231806, 
-2554955.3087209, -2561285.47027227, -2567008.30844017, -2575126.00563387, 
-2582358.27226944), .Dim = c(19L, 2L))), class = c("XY", "POLYGON", 
"sfg")), structure(list(structure(c(283344.246238729, 287308.169406312, 
287085.111647096, 289727.343964804, 290109.939547828, 292235.963342529, 
291094.720439021, 293014.518153712, 288473.592209144, 284340.809701151, 
278585.400462918, 278663.587566502, 277046.169367799, 279027.238226489, 
278150.997236807, 283344.246238729, -2536843.47016035, -2534707.65334712, 
-2537145.99703628, -2537864.30653648, -2540981.9151418, -2541866.91487573, 
-2544021.83441239, -2545938.99223406, -2546010.51264318, -2548264.35775681, 
-2545525.57453531, -2543160.74876303, -2542253.48922496, -2539662.47951233, 
-2537406.872217, -2536843.47016035), .Dim = c(16L, 2L))), class = c("XY", 
"POLYGON", "sfg"))), n_empty = 0L, precision = 0, crs = structure(list(
    input = "+proj=tmerc +lat_0=0 +lon_0=27 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs", 
    wkt = "PROJCRS[\"unknown\",\n    BASEGEOGCRS[\"unknown\",\n        DATUM[\"World Geodetic System 1984\",\n            ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n                LENGTHUNIT[\"metre\",1]],\n            ID[\"EPSG\",6326]],\n        PRIMEM[\"Greenwich\",0,\n            ANGLEUNIT[\"degree\",0.0174532925199433],\n            ID[\"EPSG\",8901]]],\n    CONVERSION[\"unknown\",\n        METHOD[\"Transverse Mercator\",\n            ID[\"EPSG\",9807]],\n        PARAMETER[\"Latitude of natural origin\",0,\n            ANGLEUNIT[\"degree\",0.0174532925199433],\n            ID[\"EPSG\",8801]],\n        PARAMETER[\"Longitude of natural origin\",27,\n            ANGLEUNIT[\"degree\",0.0174532925199433],\n            ID[\"EPSG\",8802]],\n        PARAMETER[\"Scale factor at natural origin\",1,\n            SCALEUNIT[\"unity\",1],\n            ID[\"EPSG\",8805]],\n        PARAMETER[\"False easting\",0,\n            LENGTHUNIT[\"metre\",1],\n            ID[\"EPSG\",8806]],\n        PARAMETER[\"False northing\",0,\n            LENGTHUNIT[\"metre\",1],\n            ID[\"EPSG\",8807]]],\n    CS[Cartesian,2],\n        AXIS[\"(E)\",east,\n            ORDER[1],\n            LENGTHUNIT[\"metre\",1,\n                ID[\"EPSG\",9001]]],\n        AXIS[\"(N)\",north,\n            ORDER[2],\n            LENGTHUNIT[\"metre\",1,\n                ID[\"EPSG\",9001]]]]"), class = "crs"), class = c("sfc_POLYGON", 
"sfc"), bbox = structure(c(xmin = 188622.34734284, ymin = -2605133.21307845, 
xmax = 324734.234123038, ymax = -2488249.04572909), class = "bbox"))), row.names = c(NA, 
8L), sf_column = "geometry", agr = structure(c(lprd_offtk = NA_integer_, 
groups = NA_integer_), class = "factor", .Label = c("constant", 
"aggregate", "identity")), class = c("sf", "data.frame"))

Best Answer

Now that we have indicated all of the issues you could try some clustering approaches using optimizations such as Simulated Annealing. Here is a quick worked example using Max-p Simulated Annealing. The use of queen_weights is defining first order neighbors (those that touch) and the optimization target is 10% of the population which would be similar to your "sum to 1 target". Keep in mind that this clustering approach uses simulated annealing so, changes in the heating parameter can result in very different solutions.

library(sf)
library(rgeoda)

guerry <- st_read(system.file("extdata", "Guerry.shp", 
                  package = "rgeoda"))
  guerry <- guerry[c('Crm_prs','Crm_prp','Pop1831')]

ijw <- queen_weights(guerry)
  mpc <- maxp_sa(ijw, guerry, guerry['Pop1831'], 3236.67, 
                 cooling_rate=0.85, sa_maxit=1)
    guerry$clust <- mpc$Clusters
      plot(guerry["clust"])

Here we check the solution(s)

for(i in sort(unique(guerry$clust))) {
  cat("sum of cluster", i, sum(guerry[guerry$clust == i,]$Pop1831),
      "with target of 3236.67",  "\n")
}   

example cluster solution

Now, lets look at your data (p sf polygon object was created from the structure output in the original post).

ijw <- queen_weights(p)
  mpc <- maxp_sa(ijw, p, p["lprd_offtk"], 1, 
                 cooling_rate=0.85, sa_maxit=1)
    p$clust <- mpc$Clusters
      plot(p["clust"])

Here we can check how close to target sum we get (in my run it was 2 cluster solutions with 1.261058 and 1.047192).

for(i in sort(unique(p$clust))) {
  cat("sum of cluster", i, sum(p[p$clust == i,]$lprd_offtk),
      "with target of 1",  "\n")
}

your data cluster solution

Related Question