Looking for a solution in R to group polygons that (1) touch each other AND (2) sum to 1 (or as close to 1 as possible given the data). Understandably the data may not allow for perfect grouping, but I need to identify the greatest number of groupings that follow the above two rules.
If there are "left-over" polygons that don't quite sum to 1, they can simply be added into the nearest group. I understand that it may be a bit of a vague rule-set, but it's a real-world example so I seek some advice.
My spatial polygon object is in sf format.
Reproducible object (which I simplified a bit to reduce the size of the object):
structure(list(lprd_offtk = c(0.576595614111913, 0.470596431324882,
0.313013556438852, 0.291847827347769, 0.246893506907835, 0.185238421883191,
0.182961201600402, 0.0411038710600083), groups = c(1, 1, 1, 1,
1, 1, 1, 1), geometry = structure(list(structure(list(structure(c(249875.059541146,
252012.035024516, 258685.257224514, 265376.684035709, 267738.088198923,
275908.662976789, 281064.254433625, 279786.132666357, 272904.492138655,
269709.525939596, 267311.523306026, 253821.487885182, 248648.505625685,
232693.413456437, 219199.876598215, 213428.24184839, 218463.195819764,
217154.422178502, 217499.473797489, 219490.715872187, 225421.030054584,
227755.855655569, 234327.352083757, 239973.434780013, 246457.027469004,
249875.059541146, -2494700.2653827, -2496056.45867386, -2489409.8295265,
-2488249.04572909, -2492688.35003626, -2496804.08657781, -2503136.56846075,
-2504891.76738967, -2504886.38095311, -2506784.91500093, -2511347.68966855,
-2519728.1790659, -2529913.10281212, -2540847.16443872, -2539940.8520852,
-2538048.37233691, -2532106.29000754, -2529129.92905356, -2517321.76768076,
-2512825.00939204, -2512522.86185769, -2509495.51025011, -2508897.25719341,
-2506040.65741496, -2500724.41107138, -2494700.2653827), .Dim = c(26L,
2L))), class = c("XY", "POLYGON", "sfg")), structure(list(structure(c(281064.254433625,
284721.06606518, 279569.016927732, 282094.14898332, 281549.481661996,
284157.142691013, 282305.321387715, 283204.573710021, 273726.267363972,
269374.744023395, 267870.238560712, 266222.58758153, 256514.840665134,
258248.422578011, 256043.736801008, 255083.684369923, 247588.829402245,
242499.566458413, 239089.579903093, 235412.606442071, 232693.413456437,
248648.505625685, 253821.487885182, 267311.523306026, 271048.972508793,
279786.132666357, 281064.254433625, -2503136.56846075, -2516760.32569946,
-2518872.10133995, -2520773.76435034, -2524275.20063014, -2530448.22436733,
-2532136.38788482, -2537039.64229238, -2538755.21593081, -2538507.46831983,
-2536692.85843521, -2538552.82296956, -2539217.17692106, -2543393.3685468,
-2543839.44393153, -2548626.33101899, -2548550.97475263, -2551056.56970986,
-2548313.03575587, -2548248.7328639, -2540847.16443872, -2529913.10281212,
-2519728.1790659, -2511347.68966855, -2505846.46760906, -2504891.76738967,
-2503136.56846075), .Dim = c(27L, 2L))), class = c("XY", "POLYGON",
"sfg")), structure(list(structure(c(232693.413456437, 235412.606442071,
239089.579903093, 244827.215235134, 242031.464922637, 231806.447551693,
228328.092368238, 225160.026743837, 225673.702796975, 223775.699904158,
222347.166277312, 218647.66260595, 214552.65515603, 204583.843831019,
192032.247618289, 200080.060096766, 200277.787113699, 191486.447821016,
189540.547330367, 188622.34734284, 191180.019984731, 191764.263346018,
194439.83928237, 193200.560979109, 197259.461397278, 203059.868545647,
205319.554316276, 210196.492259697, 209272.229830089, 213428.24184839,
219199.876598215, 232693.413456437, -2540847.16443872, -2548248.7328639,
-2548313.03575587, -2554139.64623601, -2562157.82691972, -2572652.06161707,
-2580661.46857492, -2582586.73282374, -2590361.64630144, -2600768.83961439,
-2601118.96791017, -2596549.23096386, -2595908.84828633, -2587558.31098132,
-2582008.47163027, -2578878.15330717, -2576213.00214426, -2576060.33555871,
-2573428.41342399, -2566314.3667436, -2565176.09049602, -2562132.22863948,
-2562197.32229417, -2559373.61809678, -2555845.10698301, -2553173.01189052,
-2554712.86185243, -2550544.36669981, -2545872.33950869, -2538048.37233691,
-2539940.8520852, -2540847.16443872), .Dim = c(32L, 2L))), class = c("XY",
"POLYGON", "sfg")), structure(list(structure(c(293014.518153712,
302663.623878803, 299614.736129523, 299387.143977218, 300713.765833193,
296012.630953997, 296414.646404753, 291613.202872935, 290101.086700019,
288351.994932675, 280050.141744657, 276348.36159945, 268239.242818116,
266664.696781647, 261329.775210368, 260803.076643104, 255043.118691357,
256043.736801008, 258248.422578011, 256514.840665134, 266222.58758153,
267870.238560712, 269719.998476136, 278115.141692842, 279027.238226489,
277046.169367799, 278663.587566502, 278390.166162141, 282776.00453892,
293014.518153712, -2545938.99223406, -2547984.25248741, -2552335.30469973,
-2556748.93411025, -2557918.93443286, -2563255.41718524, -2568141.48940461,
-2573873.67048764, -2580713.55216644, -2582358.27226944, -2575126.00563387,
-2567008.30844017, -2561285.47027227, -2554370.63011131, -2552687.15762284,
-2548859.05347352, -2548368.6934926, -2543839.44393153, -2543393.3685468,
-2539217.17692106, -2538552.82296956, -2536692.85843521, -2538627.15944761,
-2538204.23532964, -2539662.47951233, -2542253.48922496, -2543160.74876303,
-2545303.54715771, -2547900.42101953, -2545938.99223406), .Dim = c(30L,
2L))), class = c("XY", "POLYGON", "sfg")), structure(list(structure(c(306327.164679465,
312157.87829059, 315727.813056016, 322247.863289479, 324734.234123038,
322280.899985046, 313282.956380406, 313182.865420436, 308893.020782966,
297419.243993161, 296690.990207211, 294621.852014004, 291701.900610509,
291614.741438732, 287419.223157528, 287044.692168127, 283344.246238729,
282305.321387715, 284157.142691013, 281549.481661996, 282094.14898332,
279695.910810639, 296955.864084469, 300889.267765777, 306327.164679465,
-2508039.61053055, -2510727.11046957, -2509609.55377647, -2510558.39775827,
-2513719.50300778, -2516967.49656351, -2518999.15235832, -2524753.05450383,
-2529273.04419919, -2529592.16669518, -2525868.72839632, -2531644.56754877,
-2533950.26964505, -2536931.8001659, -2537502.94552915, -2534657.51917214,
-2536843.47016035, -2532136.38788482, -2530448.22436733, -2524275.20063014,
-2520773.76435034, -2518693.05659809, -2515926.86985414, -2511082.96444285,
-2508039.61053055), .Dim = c(25L, 2L))), class = c("XY", "POLYGON",
"sfg")), structure(list(structure(c(242499.566458413, 247588.829402245,
258286.862425668, 260803.076643104, 261329.775210368, 264273.044612322,
258473.97206371, 259983.568923105, 255154.98928701, 256902.238131262,
255753.861067199, 247571.580948403, 235663.721895586, 225742.204227351,
225435.119550552, 228328.092368238, 231806.447551693, 242144.157713471,
244827.215235134, 242499.566458413, -2551056.56970986, -2548550.97475263,
-2548187.135643, -2548859.05347352, -2552687.15762284, -2554049.32231806,
-2560167.55876203, -2568074.91322253, -2577506.8627014, -2589015.02513639,
-2590721.649419, -2587110.19199153, -2585872.89272702, -2588552.69485312,
-2582111.24615956, -2580661.46857492, -2572652.06161707, -2561940.41176137,
-2554139.64623601, -2551056.56970986), .Dim = c(20L, 2L))), class = c("XY",
"POLYGON", "sfg")), structure(list(structure(c(288351.994932675,
287495.305756137, 276680.392697904, 270919.365058707, 269340.771068311,
265346.889957803, 255230.477628228, 254108.340437253, 256902.238131262,
255154.98928701, 259983.568923105, 258651.712140395, 263994.051303531,
264273.044612322, 267106.535022276, 268239.242818116, 276348.36159945,
280050.141744657, 288351.994932675, -2582358.27226944, -2585779.41018566,
-2591654.24876776, -2591005.29640292, -2594986.78395064, -2598683.40780428,
-2605133.21307845, -2593026.46124507, -2589015.02513639, -2577506.8627014,
-2568074.91322253, -2559477.35553307, -2555735.53509041, -2554049.32231806,
-2554955.3087209, -2561285.47027227, -2567008.30844017, -2575126.00563387,
-2582358.27226944), .Dim = c(19L, 2L))), class = c("XY", "POLYGON",
"sfg")), structure(list(structure(c(283344.246238729, 287308.169406312,
287085.111647096, 289727.343964804, 290109.939547828, 292235.963342529,
291094.720439021, 293014.518153712, 288473.592209144, 284340.809701151,
278585.400462918, 278663.587566502, 277046.169367799, 279027.238226489,
278150.997236807, 283344.246238729, -2536843.47016035, -2534707.65334712,
-2537145.99703628, -2537864.30653648, -2540981.9151418, -2541866.91487573,
-2544021.83441239, -2545938.99223406, -2546010.51264318, -2548264.35775681,
-2545525.57453531, -2543160.74876303, -2542253.48922496, -2539662.47951233,
-2537406.872217, -2536843.47016035), .Dim = c(16L, 2L))), class = c("XY",
"POLYGON", "sfg"))), n_empty = 0L, precision = 0, crs = structure(list(
input = "+proj=tmerc +lat_0=0 +lon_0=27 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs",
wkt = "PROJCRS[\"unknown\",\n BASEGEOGCRS[\"unknown\",\n DATUM[\"World Geodetic System 1984\",\n ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n LENGTHUNIT[\"metre\",1]],\n ID[\"EPSG\",6326]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433],\n ID[\"EPSG\",8901]]],\n CONVERSION[\"unknown\",\n METHOD[\"Transverse Mercator\",\n ID[\"EPSG\",9807]],\n PARAMETER[\"Latitude of natural origin\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433],\n ID[\"EPSG\",8801]],\n PARAMETER[\"Longitude of natural origin\",27,\n ANGLEUNIT[\"degree\",0.0174532925199433],\n ID[\"EPSG\",8802]],\n PARAMETER[\"Scale factor at natural origin\",1,\n SCALEUNIT[\"unity\",1],\n ID[\"EPSG\",8805]],\n PARAMETER[\"False easting\",0,\n LENGTHUNIT[\"metre\",1],\n ID[\"EPSG\",8806]],\n PARAMETER[\"False northing\",0,\n LENGTHUNIT[\"metre\",1],\n ID[\"EPSG\",8807]]],\n CS[Cartesian,2],\n AXIS[\"(E)\",east,\n ORDER[1],\n LENGTHUNIT[\"metre\",1,\n ID[\"EPSG\",9001]]],\n AXIS[\"(N)\",north,\n ORDER[2],\n LENGTHUNIT[\"metre\",1,\n ID[\"EPSG\",9001]]]]"), class = "crs"), class = c("sfc_POLYGON",
"sfc"), bbox = structure(c(xmin = 188622.34734284, ymin = -2605133.21307845,
xmax = 324734.234123038, ymax = -2488249.04572909), class = "bbox"))), row.names = c(NA,
8L), sf_column = "geometry", agr = structure(c(lprd_offtk = NA_integer_,
groups = NA_integer_), class = "factor", .Label = c("constant",
"aggregate", "identity")), class = c("sf", "data.frame"))
Best Answer
Now that we have indicated all of the issues you could try some clustering approaches using optimizations such as Simulated Annealing. Here is a quick worked example using Max-p Simulated Annealing. The use of
queen_weights
is defining first order neighbors (those that touch) and the optimization target is 10% of the population which would be similar to your "sum to 1 target". Keep in mind that this clustering approach uses simulated annealing so, changes in the heating parameter can result in very different solutions.Here we check the solution(s)
Now, lets look at your data (p sf polygon object was created from the structure output in the original post).
Here we can check how close to target sum we get (in my run it was 2 cluster solutions with 1.261058 and 1.047192).