Quantcast

Updated the sort code to handle very large sets of data

Kevin Lyles [10-07-12 - 19:09]
Updated the sort code to handle very large sets of data

This sidesteps the lua 5.1 limitation that functions can only contain a
certain number of constants by splitting large tables into multiple
functions.
Filename
sort.lua
diff --git a/sort.lua b/sort.lua
index 1f9633b..b7d9e86 100644
--- a/sort.lua
+++ b/sort.lua
@@ -1,3 +1,5 @@
+local CHUNK_SIZE = 20000
+
 local function separateNumericAndTextPortions(str)
 	local result = {}
 	local i, count = 1, 0
@@ -118,7 +120,7 @@ local function varName(name)
 	end
 end

-function sort(tbl, name, indent)
+function sort(tbl, name, indent, incremental)
 	local indentStr
 	if indent then
 		indentStr = string.rep("\t", indent)
@@ -127,32 +129,71 @@ function sort(tbl, name, indent)
 		indentStr = ""
 	end

-	output(string.format("%s%s = {", indentStr, varName(name)))
-
 	local indexTable = {}
 	for index, value in pairs(tbl) do
 		table.insert(indexTable, index)
 	end
 	table.sort(indexTable, mixedSort)

-	for _, index in ipairs(indexTable) do
-		local value = tbl[index]
-		if type(value) == "string" then
-			output(string.format("%s\t%s = %s,", indentStr, varName(index), quotedString(value)))
-		elseif type(value) == "number" then
-			value = string.gsub(string.format("%f", value), "%.?0+$", "")
-			output(string.format("%s\t%s = %s,", indentStr, varName(index), value))
-		elseif type(value) == "table" then
-			sort(value, index, indent + 1)
-		elseif type(value) == "boolean" then
-			output(string.format("%s\t%s = %s,", indentStr, varName(index), value and "true" or "false"))
-		else
-			output(string.format("%s\tUnhandled value type: %q", indentStr, type(value)))
+	if #(indexTable) > CHUNK_SIZE then
+		output(string.format("%s = {}", name))
+		for i = 1, #(indexTable), CHUNK_SIZE do
+			output(string.format("\nlocal function chunk%d()", math.ceil(i / 20000)))
+			output(string.format("\tlocal t = %s\n", name))
+			for j = i, i + CHUNK_SIZE - 1 do
+				local index = indexTable[j]
+				if not index then
+					break
+				end
+				local value = tbl[index]
+				if type(value) == "string" then
+					output(string.format("%s\tt[%s] = %s,", indentStr, quotedString(index), quotedString(value)))
+				elseif type(value) == "number" then
+					value = string.gsub(string.format("%f", value), "%.?0+$", "")
+					output(string.format("%s\tt[%s] = %s,", indentStr, quotedString(index), value))
+				elseif type(value) == "table" then
+					output(string.format("%s\tt[%s] = {", indentStr, quotedString(index)))
+					sort(value, nil, indent + 1, true)
+				elseif type(value) == "boolean" then
+					output(string.format("%s\tt[%s] = %s,", indentStr, quotedString(index), value and "true" or "false"))
+				else
+					output(string.format("%s\tUnhandled value type: %q", indentStr, type(value)))
+				end
+			end
+			output("end")
+		end
+		output()
+		for i = 1, #(indexTable), CHUNK_SIZE do
+			output(string.format("chunk%d()", math.ceil(i / 20000)))
 		end
-	end
-	if indent > 0 then
-		output(string.format("%s},", indentStr))
 	else
-		output("}")
+		if not incremental then
+			output(string.format("%s%s = {", indentStr, varName(name)))
+		end
+
+		for _, index in ipairs(indexTable) do
+			local value = tbl[index]
+			if type(value) == "string" then
+				output(string.format("%s\t%s = %s,", indentStr, varName(index), quotedString(value)))
+			elseif type(value) == "number" then
+				value = string.gsub(string.format("%f", value), "%.?0+$", "")
+				output(string.format("%s\t%s = %s,", indentStr, varName(index), value))
+			elseif type(value) == "table" then
+				sort(value, index, indent + 1)
+			elseif type(value) == "boolean" then
+				output(string.format("%s\t%s = %s,", indentStr, varName(index), value and "true" or "false"))
+			else
+				output(string.format("%s\tUnhandled value type: %q", indentStr, type(value)))
+			end
+		end
+		if indent > 0 then
+			if incremental then
+				output(string.format("%s}", indentStr))
+			else
+				output(string.format("%s},", indentStr))
+			end
+		else
+			output("}")
+		end
 	end
 end