Files
2026-03-29 21:41:17 +03:00

1002 lines
40 KiB
Lua

--[[
A graphing and formatting utility script for the lmprof 'graph' output.
This script introduces two classes:
1. LMNode - A graph node that represents a single function instance or
the accumulation of all instances of the same function.
2. LMGraph - A graph composed of LMNode instances, measurements about
them, and functions for generating human readable and application
specific outputs.
Three output formats are currently supported:
1. Flat - A tabular representation that may optionally be delimited, e.g., a CSV.
2. PepperFish - A layout similar to lua-users.org/wiki/PepperfishProfiler
3. Callgrind - A callgrind/kcachegrind compatible graph representation.
The 'header' table of the lmprof 'graph' output supports the additional
fields related to formatting and output of the graph:
[BOOLEAN]
csv - "Flat" layout is comma delimited [BOOLEAN]
show_lines - Show line frequencies for flat output
[STRING]
sort - Sort the LMGraph output by a given category:
'count',
'time',
'total_time',
'allocated',
'total_allocated'
@NOTES
Instead of using io.write, or the 'io' library in general, a simplified
writer API is used. For example, stdout:
StdOut = {
flush = function() end,
write_line = function(_, str) print(str) end,
}
The final argument to each 'format' method will the writer object. Defaulting
to StdOut on nil.
@EXAMPLE
-- local result = ... -- some lmprof operation.
result.header.sort = "count" -- Append additional graphing properties
result.header.csv = false
-- Create a new format instance and produce a graph structure from the
-- result table.
local graph = Citizen.Graph(result.header, result.records)
-- Generate a Flat representation
graph:Flat(result.header)
-- Generate a Pepperfish representation
graph:Pepperfish(result.header)
-- Generate a Callgrind representation
graph:Callgrind(result.header)
@LICENSE
See Copyright Notice in lmprof_lib.h
--]]
local LMGraph = nil
local LMNode = nil
local Binary = nil
local Decimal = nil
---------------------------------------
---------- Utility Functions ----------
---------------------------------------
-- Attempt to use the UTF-8 representation of mu, otherwise fallback to u.
local mu = (utf8 and utf8.char(0x03BC)) or "u"
-- When aligning columns use the utf8 length when possible.
local utf8_len = (utf8 and utf8.len) or string.len
--[[
Formatting labels, If "_VERSION <= Lua 5.2", then the UTF-8 code must be
replaced with "u".
--]]
local MetrixPrefix = { "n", mu, "m", "", "k", "M", "G", "T", "P", "E", "Z", "Y" }
--[[ Pad a (UTF8) string with left or right justification. --]]
local function string_pad(str, width, padStr)
local padStr = string.rep(padStr or ' ', math.abs(width))
local width = width - (utf8_len(str) - str:len())
return (width < 0 and string.sub(padStr .. str, width))
or string.sub(str .. padStr, 1, width)
end
--[[ Check a string prefix --]]
local function string_starts(str, start)
return str:sub(1, #start) == start
end
--[[ Binary (base 2) representation. --]]
Binary = {
Exp = function(value, degree) return value * (1024 ^ -degree) end,
Log = function(value)
if value == 0 then return 0 end
return math.floor((math.log(math.abs(value)) / math.log(2)) / 10)
end,
-- Format the given value according to a fixed degree (see Exp)
Format = function(value, degree)
return Binary.MetricFormat(Binary.Exp(value, degree))
end,
-- Create a human readable column header.
Readable = function(degree, offset, suffix)
return ("%s%s"):format(MetrixPrefix[degree + 1 + offset], suffix or "")
end,
-- Return a format string that limits the number of decimal values based on
-- the size of the value.
MetricFormat = function(value)
local formatStr = "%.4f"
if value > 1000 then
formatStr = "%.2f"
elseif value > 100 then
formatStr = "%.3f"
elseif value > 10 then
formatStr = "%.4f"
end
return string.format(formatStr, value)
end
}
--[[ Decimal (base 10) representation --]]
Decimal = {
Exp = function(value, degree) return value * (1000 ^ -degree) end,
Log = function(value)
if value == 0 then return 0 end
return math.floor((math.log(math.abs(value)) / math.log(10)) / 3)
end,
-- Format the given value according to a fixed degree (see Exp)
Format = function(value, degree)
return Binary.MetricFormat(Decimal.Exp(value, degree))
end,
-- Create a human readable column header.
Readable = function(degree, offset, suffix)
return ("%s%s"):format(MetrixPrefix[degree + 1 + offset], suffix or "")
end,
}
----------------------------------------
------------- Profile Node -------------
----------------------------------------
--[[
A function node that represents:
(1) A single 'aggregated' activation record instance, e.g., a <func, parent>
tuple or <func, parent, parent_line> triple.
(2) The accumulated statistics of all instances of the same function, its
sources of invocation (parents), and decadents (children).
--]]
LMNode = setmetatable({
--[[
Fields specified by lmprof_report.h mapped to categories, labels, and
functions:
Label - A human readable name.
Sort - A (total) ordering function.
Cat - Category for post processing: Specifying the unit of measurement,
how values are aggregated, and their format.
--]]
Fields = {
name = { Label = "Name", Cat = "String", Sort = function(fa, fb) if fa.name == fb.name then return 0 end ; return (fa.name < fb.name and -1) or 1 end },
count = { Label = "Count", Cat = "Count", Sort = function(fa, fb) return fa.count - fb.count end },
time = { Label = "Self-Time", Cat = "Time", Sort = function(fa, fb) return fa.time - fb.time end },
total_time = { Label = "Total-Time", Cat = "Time", Sort = function(fa, fb) return fa.total_time - fb.total_time end },
allocated = { Label = "Alloc", Cat = "Binary", Sort = function(fa, fb) return fa.allocated - fb.allocated end },
total_allocated = { Label = "Total", Cat = "Binary", Sort = function(fa, fb) return fa.total_allocated - fb.total_allocated end },
deallocated = { Label = "Dealloc", Cat = "Binary" },
total_deallocated = { Label = "Dealloc", Cat = "Binary" },
lines = { Label = "Lines", Cat = "Table" },
id = { Label = "ID", Cat = "String" },
parent = { Label = "PID", Cat = "String" },
depth = { Label = "Depth", Cat = "Count", Sort = function(fa, fb) return fa.depth - fb.depth end },
-- Extended fields
timePerCount = { Label = "Time/Call", Cat = "TimeAverage" },
totalTimePerCount = { Label = "Total/Call", Cat = "TimeAverage" },
allocPerCount = { Label = "Alloc/Call", Cat = "BinaryAverage" },
totalAllocPerCount = { Label = "Total/Call", Cat = "BinaryAverage" },
timePercent = { Label = "PctTotal", Cat = "Percent" },
allocPercent = { Label = "PctTotal", Cat = "Percent" },
childTimePercent = { Label = "Pct Time Child", Cat = "Percent" },
childSizePercent = { Label = "Pct Alloc Child", Cat = "Percent" },
},
}, {
__tostring = function() return "Node" end,
__call = function(_, ...)
return LMNode.New(...)
end
})
LMNode.__index = LMNode
--[[ Create a new function record. --]]
function LMNode.New(id, record, previous)
local result = previous or setmetatable({
id = id,
record = record,
name = (record == nil and "") or record.source,
-- Statistics
count = 0,
child_count = 0, -- Total number of sampler hits in all descendants
-- Graph Structure
rechable = false,
depth = math.huge, -- Distance from root
parents = { },
children = { },
}, LMNode)
-- Ensure a record exists.
if result.record == nil then
result.record = record
end
-- Time profiling statistics
if record ~= nil and record.time ~= nil then
result.time = 0.0
result.total_time = 0.0
result.timePerCount = 0.0
result.totalTimePerCount = 0.0
result.timePercent = 0.0
result.childTimePercent = 0.0
end
-- Memory profiling statistics
if record ~= nil and record.allocated ~= nil then
result.allocated = 0.0
result.deallocated = 0.0
result.total_allocated = 0.0
result.total_deallocated = 0.0
result.allocPerCount = 0.0
result.totalAllocPerCount = 0.0
result.allocPercent = 0.0
result.childSizePercent = 0.0
end
-- Line Frequency
if record ~= nil and record.lines ~= nil then
result.lines = { }
end
return result
end
--[[
Merge the profile statistics of 'record' into this node, while associating
an additional parent node.
In other words, the 'parent' function invoked 'this' function and had
accumulated a set amount of statistics.
--]]
function LMNode:Append(header, record, parentNode)
local tcp = "%(%.%.%.tail calls%.%.%.%)"
local name = self.name
local r_name = record.source
local r_id = (header.compress_graph and record.func) or record.id
-- Ensure parent/child relationship is set.
if parentNode ~= nil then
self.parents[record.parent] = parentNode
parentNode.children[r_id] = self
end
-- always try to use the function name instead of (...tail calls...)
if (name:match(tcp) and not r_name:match(tcp))
or (string_starts(name, "?") and not string_starts(r_name, "?"))
or (not r_name:match(tcp) and utf8_len(r_name) > utf8_len(name)) then
self.name = r_name
end
-- Update statistics. Do not update the 'total' fields for self calls as
-- they will have been handled by 'measured_pop'.
--
-- @NOTE: When compress_graph is enabled, record.parent is its unique
-- record identifier, not the function identifier.
if r_id ~= record.parent then
for k,_ in pairs(self) do
local f = LMNode.Fields[k]
if record[k] and f and (f.Cat == "Count" or f.Cat == "Time" or f.Cat == "Binary") then
self[k] = self[k] + record[k]
end
end
else
self.count = self.count + record.count
if self.time then self.time = self.time + record.time end
if self.allocated then
self.allocated = self.allocated + record.allocated
self.deallocated = self.deallocated + record.deallocated
end
end
-- If line_freq was enabled, combine frequencies
if record.lines ~= nil then
local funcLines = self.lines
for i=1,#record.lines do
if funcLines[i] ~= nil then
funcLines[i] = funcLines[i] + record.lines[i]
else
funcLines[i] = record.lines[i]
end
end
end
end
--[[
Depth-first iterate through the reduced graph structure, setting a reachable
field to each processed node to true.
--]]
function LMNode:DepthFirstReachable(depth)
if not self.reachable then
local child_count = 0
self.depth = depth
self.reachable = true
for _,childNode in pairs(self.children) do
childNode:DepthFirstReachable(depth + 1)
child_count = child_count + childNode.child_count + childNode.count
end
self.child_count = child_count
else
self.depth = math.min(self.depth or 0, depth)
end
end
----------------------------------------
---------------- Reduce ----------------
----------------------------------------
--[[
A graph composed of LMNode instances, measurements about all nodes in the
graph, and generating a human readable output.
--]]
LMGraph = setmetatable({
--[[ IO Interface --]]
StdOut = {
flush = function() end,
write_line = function(_, str)
Citizen.Trace(str)
Citizen.Trace("\n")
end,
},
--[[
Sorting categories: an ordered list of fields ordered by priority. Where
successive fields are used to handle tie breaks.
--]]
Sorting = {
count = { "count", "depth", "name" },
time = { "time", "count", "depth", "name" },
total_time = { "total_time", "count", "depth", "name" },
allocated = { "allocated", "count", "depth", "name" },
total_allocated = { "total_allocated", "count", "depth", "name" },
},
}, {
__tostring = function() return "Profile Graph" end,
__call = function(_, ...)
return LMGraph.New(...)
end
})
LMGraph.__index = LMGraph
--[[ Create a new format graph --]]
function LMGraph.New(header, outputTable)
local self = setmetatable({
root = nil,
functions = { }, -- A 'node' referencing each profiled function.
global = {
count = 0, -- Total number of function calls.
size = 0, -- Total amount of memory allocated,
time = 0, -- Total execution time
max_count = 0, -- Large record count
max_size = 0, -- Largest allocation record
max_time = 0, -- Largest timing record
},
}, LMGraph)
-- Create function records,: store all records of the same function, or
-- identifier, in one object.
for i=1,#outputTable do
local record = outputTable[i]
local recordId = (header.compress_graph and record.func) or record.id
local parentRecordId = record.parent
local node = self.functions[recordId]
if not node or node.record == nil then -- Create a node
node = LMNode.New(recordId, record, node)
self.functions[recordId] = node
if record.func == "0" then -- root of the graph
self.root = node
end
end
local parentNode = nil
if record.func ~= "0" then
parentNode = self.functions[parentRecordId]
if not parentNode then -- Preallocate the parent node.
parentNode = LMNode.New(parentRecordId, nil, nil)
self.functions[parentRecordId] = parentNode
if record.parent == "0" then
self.root = parentNode
end
end
end
-- Update function statistics
node:Append(header, record, parentNode)
-- Update global maximums
self.global.count = self.global.count + record.count
self.global.time = self.global.time + ((header.instrument and record.time) or 0)
self.global.size = self.global.size + ((header.memory and record.allocated) or 0)
end
-- Ensure a graph exists.
if #outputTable == 0 then
error("Script requires load_stack or reduced instructions count")
elseif self.root == nil then
error("Root node/record does not exist!")
end
-- Ensure the graph is completely connected, i.e., an undirected path exists
-- between all funcNodes. ... This script doesn't support multiple subgraphs
-- at the moment although a trivial extension.
self.root:DepthFirstReachable(0)
for id,node in pairs(self.functions) do
if not node.reachable then
error(("%s is not reachable!"):format(node.name))
end
-- Ensure the child table if each node is properly initialized
for pid,parentNode in pairs(node.parents) do
parentNode.children[id] = node
if not header.compress_graph and not node.parent then
node.parent = parentNode.id
end
end
end
-- Calculate relative statistics based on the global accumulated values.
local global_size = self.global.size
local global_time = self.global.time
for id,node in pairs(self.functions) do
self.global.max_count = math.max(self.global.max_count, node.count)
if header.instrument and node.record ~= nil and node.time ~= nil then
self.global.max_time = math.max(self.global.max_time, node.time)
node.timePercent = 0.0
node.childTimePercent = 0.0
node.timePerCount = 0
node.totalTimePerCount = 0
if node.count ~= 0 then
node.timePerCount = node.time / node.count
node.totalTimePerCount = node.total_time / node.count
end
if global_time ~= 0 then
node.timePercent = 100.0 * (node.time / global_time)
node.childTimePercent = 100.0 * ((node.total_time - node.time) / global_time)
end
end
if header.memory and node ~= nil and node.record ~= nil and node.allocated ~= nil then
self.global.max_size = math.max(self.global.max_size, node.allocated or 0)
node.allocPercent = 0.0
node.childSizePercent = 0.0
node.allocPerCount = 0
node.totalAllocPerCount = 0
if node.count ~= 0 then
node.allocPerCount = (node.allocated / node.count)
node.totalAllocPerCount = (node.total_allocated / node.count)
end
if global_size ~= 0 then
node.allocPercent = 100.0 * (node.allocated / global_size)
node.childSizePercent = 100.0 * ((node.total_allocated - node.allocated) / global_size)
end
end
end
return self
end
--[[ Compute the base SI unit for timing. --]]
function LMGraph:CreateTimeUnits(header, max_time, max_count)
-- lmprof has the ability to change the base measurement of time, parse the
-- data from the header.
local timeOffset = 0
if header.clockid == "micro" or header.clockid == 'Krdtsc' then
timeOffset = 1
end
local timeDegree,timeSuffix,timeUnit = nil,nil,nil
if header.clockid == "rdtsc" or header.clockid == 'Krdtsc' then
timeUnit = "Tick"
timeDegree = Decimal.Log(max_time)
timeSuffix = Decimal.Readable(timeDegree, timeOffset, timeUnit)
else
timeUnit = "s"
timeDegree = Decimal.Log(max_time)
timeSuffix = Decimal.Readable(timeDegree, timeOffset, timeUnit)
end
local timePerDegree = timeDegree - Decimal.Log(max_count)
local timePerSuffix = Decimal.Readable(timePerDegree, timeOffset, timeUnit)
return timeDegree,timeSuffix,timePerDegree,timePerSuffix
end
--[[ Compute the base SI unit for memory usage. --]]
function LMGraph:CreateByteUnits(header, max_size, max_count)
local byteDegree = Binary.Log(max_size)
local bytePerDegree = byteDegree - Binary.Log(max_count)
local byteSuffix = Binary.Readable(byteDegree, 3, "B")
local bytePerSuffix = Decimal.Readable(bytePerDegree, 3, "B")
return byteDegree,byteSuffix,bytePerDegree,bytePerSuffix
end
--[[ Output debug statistics of the profile --]]
function LMGraph:Verbose(header, outfile)
local outfile = outfile or LMGraph.StdOut
local timeDegree,timeSuffix,_,_ = self:CreateTimeUnits(header, header.profile_overhead, 1)
local buff = { }
buff[#buff + 1] = { "Header", "" }
buff[#buff + 1] = { "Clock ID", tostring(header.clockid)}
buff[#buff + 1] = { "Factored Profile Overhead", ("%s %s"):format(Decimal.Format(header.profile_overhead, timeDegree), timeSuffix)}
buff[#buff + 1] = { "Calibration", ("%s %s"):format(Decimal.Format(header.calibration, timeDegree), timeSuffix)}
buff[#buff + 1] = { "Instruction Counter", tostring(header.instr_count)}
buff[#buff + 1] = { "", "" }
if header.callback then -- Append Trace Event statistics
buff[#buff + 1] = { "Trace Events", "" }
buff[#buff + 1] = { "Compressed Events", tostring(header.compress) }
buff[#buff + 1] = { "Compression Threshold", tostring(header.threshold) }
buff[#buff + 1] = { "Initial Page Size" , tostring(header.pagesize) }
buff[#buff + 1] = { "Initial Page Limit", tostring(header.pagelimit) }
buff[#buff + 1] = { "Total Page Count" , tostring(header.totalpages) }
buff[#buff + 1] = { "Page Count", tostring(header.usedpages) }
buff[#buff + 1] = { "Page Size" , tostring(header.pagesize) }
buff[#buff + 1] = { "Events per Page", tostring(header.eventpages) }
buff[#buff + 1] = { "Buffer Usage", (header.pagelimit > 0 and ("%2.3f"):format(header.pageusage)) or "Inf" }
end
if header.debug then -- Debug hashing statistics.
buff[#buff + 1] = { "Hashing", "" }
buff[#buff + 1] = { "Buckets" , tostring(header.debug.buckets) }
buff[#buff + 1] = { "Used Buckets", tostring(header.debug.used_buckets) }
buff[#buff + 1] = { "Record Count", tostring(header.debug.record_count) }
buff[#buff + 1] = { "Min Bucket Count", tostring(header.debug.min) }
buff[#buff + 1] = { "Max Bucket Count", tostring(header.debug.max) }
buff[#buff + 1] = { "Mean (all)", string.format("%2.3f", header.debug.mean) }
buff[#buff + 1] = { "Variance (all)", string.format("%2.3f", header.debug.var) }
buff[#buff + 1] = { "Mean (hits)", string.format("%2.3f", header.debug.mean_hits) }
buff[#buff + 1] = { "Variance (hits)", string.format("%2.3f", header.debug.var_hits) }
buff[#buff + 1] = { "", "" }
end
local longestLabel = 0
for i=1,#buff do longestLabel = math.max(utf8_len(buff[i][1]), longestLabel) end
for i=1,#buff do
outfile:write_line(("%s %s"):format(string_pad(buff[i][1], longestLabel + 1), buff[i][2] or ""))
end
end
--[[ Create a table.sort function for ordering funcNodes (NewFunctionRecord). --]]
local function CreatePrioritySort(self, priority)
priority = priority or LMGraph.Sorting["count"]
return function(a, b)
local fa,fb = self.functions[a],self.functions[b]
for i=1,#priority do
local sort = LMNode.Fields[priority[i]].Sort
local comparator = sort(fa, fb)
if comparator ~= 0 then
return comparator > 0
end
end
return fa.name > fb.name
end
end
--[[ Flat/Table representation --]]
function LMGraph:Flat(header, outfile)
local flatColumns = {
"count",
"timePercent", "time", "total_time", "timePerCount", "totalTimePerCount",
"allocPercent", "allocated", "total_allocated", "allocPerCount", "totalAllocPerCount",
"name", "lines",
}
-- For non-compressed graphs include the nodes 'depth' (i.e., distance from
-- root), identifier, and parent identifier.
if not header.compress_graph then
table.insert(flatColumns, #flatColumns, "depth")
table.insert(flatColumns, #flatColumns, "id")
table.insert(flatColumns, #flatColumns, "parent")
end
local max_time = self.global.max_time
local max_size = self.global.max_size
local max_count = self.global.max_count
local output_delim = (header.csv and ", ") or " "
local timeDegree,timeSuffix,timePerDegree,timePerSuffix = self:CreateTimeUnits(header, max_time, max_count)
local byteDegree,byteSuffix,bytePerDegree,bytePerSuffix = self:CreateByteUnits(header, max_size, max_count)
-- Layout the functions and then sort them. Ensuring each record has each
-- key in 'flatColumns', otherwise remove it from the flatColumns table.
local functions = { }
for f,funcNode in pairs(self.functions) do
if funcNode.record ~= nil then
functions[#functions + 1] = f
for j=#flatColumns,1,-1 do
local field = LMNode.Fields[flatColumns[j]]
if field.Cat ~= "Table" and funcNode[flatColumns[j]] == nil then
table.remove(flatColumns, j)
elseif field.Cat == "Table" and not header.show_lines then
table.remove(flatColumns, j)
end
end
end
end
-- Sort rows by some column value.
table.sort(functions, CreatePrioritySort(self, LMGraph.Sorting[header.sort or "count"]))
-- Column header label to string.format() key, caching the lengths of each
-- string to compute padding.
local lengths,stringTable = { },{ }
for i=1,#functions do
local funcNode = self.functions[functions[i]]
local row = { }
for j=1,#flatColumns do
local value = funcNode[flatColumns[j]]
local field = LMNode.Fields[flatColumns[j]]
if field.Cat == "Time" then
row[j] = Decimal.Format(value, timeDegree)
elseif field.Cat == "Binary" then
row[j] = Binary.Format(value, byteDegree)
elseif field.Cat == "TimeAverage" then
row[j] = Decimal.Format(value, timePerDegree)
elseif field.Cat == "BinaryAverage" then
row[j] = Binary.Format(value, bytePerDegree)
elseif field.Cat == "Percent" then
row[j] = ("%1.3f"):format(value)
elseif field.Cat == "Table" then
local concat = table.concat(value or { }, ", ")
row[j] = (header.csv and ("\"%s\""):format(concat)) or concat
elseif header.csv and field.Cat == "String" then
row[j] = ("\"%s\""):format(value)
else
row[j] = tostring(value)
end
end
stringTable[#stringTable + 1] = row
for j=1,#row do
lengths[j] = math.max(lengths[j] or 1, utf8_len(row[j]) + 1)
end
end
--[[ Pad the string table & columns --]]
local columnLabels,columnUnits = { }, { }
for i=1,#flatColumns do
local field = LMNode.Fields[flatColumns[i]]
local unit = ""
if field.Cat == "Count" then
unit = "count"
elseif field.Cat == "Time" then
unit = ("%s"):format(timeSuffix)
elseif field.Cat == "Binary" then
unit = ("%s"):format(byteSuffix)
elseif field.Cat == "TimeAverage" then
unit = ("%s/call"):format(timePerSuffix)
elseif field.Cat == "BinaryAverage" then
unit = ("%s/call"):format(bytePerSuffix)
elseif field.Cat == "Percent" then
unit = "%"
end
lengths[i] = math.max(lengths[i] or 1, utf8_len(field.Label), utf8_len(unit) + 1)
if header.csv then
columnUnits[i] = ("\"%s\""):format(unit)
columnLabels[i] = ("\"%s\""):format(field.Label)
else
columnUnits[i] = string_pad(unit, lengths[i])
columnLabels[i] = string_pad(field.Label, lengths[i])
end
end
outfile = outfile or LMGraph.StdOut
outfile:write_line(table.concat(columnLabels, output_delim))
outfile:write_line(table.concat(columnUnits, output_delim))
for i=1,#stringTable do
local stringRecord = stringTable[i]
for j=1,#flatColumns do
if j ~= #flatColumns and not header.csv then
stringRecord[j] = string_pad(stringRecord[j], lengths[j])
end
end
outfile:write_line(table.concat(stringRecord, output_delim))
end
end
--[[ Format that resembles Pepperfish --]]
function LMGraph:Pepperfish(header, outfile)
outfile = outfile or LMGraph.StdOut
local max_time = self.global.max_time
local max_size = self.global.max_size
local max_count = self.global.max_count
-- Create SI units
local timeDegree,timeSuffix,timePerDegree,timePerSuffix = self:CreateTimeUnits(header, max_time, max_count)
local byteDegree,byteSuffix,bytePerDegree,bytePerSuffix = self:CreateByteUnits(header, max_size, max_count)
local prioritySort = CreatePrioritySort(self, LMGraph.Sorting[header.sort or "count"])
-- Layout the functions and then sort them
local functions = { }
for f,funcNode in pairs(self.functions) do
if funcNode.record ~= nil then
functions[#functions + 1] = f
end
end
table.sort(functions, prioritySort)
-- To emulate the Pepperfish layout, a shared table for format strings is
-- used. The 'funcNode' fields are then mapped to array indexes of the
-- shared table by a lookup table.
local childLayout,recordLayout,recordLookup = { },{ },{ }
local function recordAppend(key, value) recordLayout[recordLookup[key]][2] = value end
local function recordAppendUnit(key, value) recordLayout[recordLookup[key]][3] = value end
recordLayout[#recordLayout + 1] = { "Sample count:" } ; recordLookup.count = #recordLayout
childLayout[#childLayout + 1] = "Child "
childLayout[#childLayout + 1] = "" -- 2, recordName
childLayout[#childLayout + 1] = "sampled "
childLayout[#childLayout + 1] = "" -- 4, childRecordCount
childLayout[#childLayout + 1] = " times. "
if header.memory then
recordLayout[#recordLayout + 1] = { "", "", "" } ;
recordLayout[#recordLayout + 1] = { "Memory persists:", "", "suffix" } ; recordLookup.persists = #recordLayout
recordLayout[#recordLayout + 1] = { "Memory allocated:", "", "suffix" } ; recordLookup.malloc = #recordLayout
recordLayout[#recordLayout + 1] = { "Memory deallocated:", "", "suffix" } ; recordLookup.mfree = #recordLayout
recordLayout[#recordLayout + 1] = { "Memory allocated in children:", "", "suffix" } ; recordLookup.chmalloc = #recordLayout
recordLayout[#recordLayout + 1] = { "Memory allocated total:", "", "suffix" } ; recordLookup.chmalloctotal = #recordLayout
recordLayout[#recordLayout + 1] = { "Total percent allocated in function:", "", "%" } ; recordLookup.fpctalloc = #recordLayout
recordLayout[#recordLayout + 1] = { "Total percent allocated in children:", "", "%" } ; recordLookup.cpctalloc = #recordLayout
end
if header.instrument then
recordLayout[#recordLayout + 1] = { "", "", "" } ;
recordLayout[#recordLayout + 1] = { "Time spent:", "", "" } ; recordLookup.time = #recordLayout
recordLayout[#recordLayout + 1] = { "Time spent in function:", "", "", } ; recordLookup.ftime = #recordLayout
recordLayout[#recordLayout + 1] = { "Time spent in children:", "", "", } ; recordLookup.ctime = #recordLayout
recordLayout[#recordLayout + 1] = { "Time spent per sample:", "", "", "/sample" } ; recordLookup.timesample = #recordLayout
recordLayout[#recordLayout + 1] = { "Time spent in function per sample:", "", "", "/sample" } ; recordLookup.ftimesample = #recordLayout
recordLayout[#recordLayout + 1] = { "Time spent in children per sample:", "", "", "/sample"} ; recordLookup.ctimesample = #recordLayout
recordLayout[#recordLayout + 1] = { "Percent time spent in function:", "", "%" } ; recordLookup.pftime = #recordLayout
recordLayout[#recordLayout + 1] = { "Percent time spent in children:", "", "%" } ; recordLookup.pctime = #recordLayout
childLayout[#childLayout + 1] = "Took "
childLayout[#childLayout + 1] = "" -- childRecordTime
childLayout[#childLayout + 1] = " "
childLayout[#childLayout + 1] = "" -- childTimeUnit
childLayout[#childLayout + 1] = ". Averaging "
childLayout[#childLayout + 1] = "" -- childRecordTotalTime/childRecordCount
childLayout[#childLayout + 1] = " "
childLayout[#childLayout + 1] = "" -- timeunit
childLayout[#childLayout + 1] = "/exec"
end
for i=1,#functions do
local funcNode = self.functions[functions[i]]
funcNode.children[funcNode.id] = nil
-- Setup and sort child nodes
local sortedChildren = { }
for cid,_ in pairs(funcNode.children) do
sortedChildren[#sortedChildren + 1] = cid
end
table.sort(sortedChildren, prioritySort)
-- Populate format table
recordAppend("count", string.format("%4d", funcNode.count))
if header.memory then
recordAppendUnit("persists", byteSuffix)
recordAppendUnit("malloc", byteSuffix)
recordAppendUnit("mfree", byteSuffix)
recordAppendUnit("chmalloc", byteSuffix)
recordAppendUnit("chmalloctotal", byteSuffix)
recordAppend("persists", Binary.Format(funcNode.allocated - funcNode.deallocated, byteDegree))
recordAppend("malloc", Binary.Format(funcNode.allocated, byteDegree))
recordAppend("mfree", Binary.Format(funcNode.deallocated, byteDegree))
if #sortedChildren > 0 then
recordAppend("chmalloc", Binary.Format(funcNode.total_allocated - funcNode.allocated, byteDegree))
recordAppend("chmalloctotal", Binary.Format(funcNode.total_allocated, byteDegree))
recordAppend("fpctalloc", string.format("%1.3f", funcNode.allocPercent))
recordAppend("cpctalloc", string.format("%1.3f", funcNode.childSizePercent))
else
recordAppend("chmalloc", "")
recordAppend("chmalloctotal", "")
recordAppend("fpctalloc", string.format("%1.3f", funcNode.allocPercent))
recordAppend("cpctalloc", "")
end
end
if header.instrument then
recordAppendUnit("time", timeSuffix)
recordAppendUnit("ftime", timeSuffix)
recordAppendUnit("ctime", timeSuffix)
recordAppendUnit("timesample", timePerSuffix)
recordAppendUnit("ftimesample", timePerSuffix)
recordAppendUnit("ctimesample", timePerSuffix)
if #sortedChildren > 0 then
local delta_time = funcNode.total_time - funcNode.time
local ctimesample = 0
if funcNode.count ~= 0 then
ctimesample = (delta_time) / funcNode.count
end
recordAppend("time", string.format("%4.3f", Decimal.Format(funcNode.total_time, timeDegree)))
recordAppend("ftime", string.format("%4.3f", Decimal.Format(funcNode.time, timeDegree)))
recordAppend("ctime", string.format("%4.3f", Decimal.Format(delta_time, timeDegree)))
recordAppend("timesample", string.format("%4.5f", Decimal.Format(funcNode.totalTimePerCount, timePerDegree)))
recordAppend("ftimesample", string.format("%4.5f", Decimal.Format(funcNode.timePerCount, timePerDegree)))
recordAppend("ctimesample", string.format("%4.5f", Decimal.Format(ctimesample, timePerDegree)))
else
recordAppend("time", "")
recordAppend("ftime", string.format("%4.3f", Decimal.Format(funcNode.time, timeDegree)))
recordAppend("ctime", "")
recordAppend("timesample", string.format("%4.3f", Decimal.Format(funcNode.totalTimePerCount, timePerDegree)))
recordAppend("ftimesample", "")
recordAppend("ctimesample", "")
end
recordAppend("pftime", string.format("%1.3f", funcNode.timePercent))
recordAppend("pctime", string.format("%1.3f", funcNode.childTimePercent))
end
-- Setup column alignment
local longestLabel,longestUnit = 0,0
for i=1,#recordLayout do
longestUnit = math.max(utf8_len(recordLayout[i][2]), longestUnit)
longestLabel = math.max(utf8_len(recordLayout[i][1]), longestLabel)
end
-- Dump everything
local funcName = (" %s "):format(funcNode.name)
if funcName:len() < 42 then
local left = math.ceil((42 - funcName:len()) / 2)
local right = 42 - funcName:len()
funcName = ("%s%s%s"):format(string.rep("-", left), funcName, string.rep("-", right))
end
outfile:write_line(("%s%s%s"):format(string.rep("-", 19), funcName, string.rep("-", 19)))
for i=1,#recordLayout do
local tuple = recordLayout[i]
if tuple[2] ~= "" then
outfile:write_line(string.format("%s %s %s",
string_pad(tuple[1], longestLabel + 1),
string_pad(tuple[2], -(longestUnit + 1)), (tuple[3] or "")
))
elseif tuple[1] == "" then
outfile:write_line("")
end
end
local longestChildLabel = 0
for i=1,#sortedChildren do
local childNode = funcNode.children[sortedChildren[i]]
longestChildLabel = math.max(longestChildLabel, utf8_len(childNode.name))
end
local added_blank = 0
for i=1,#sortedChildren do
local childNode = funcNode.children[sortedChildren[i]]
if added_blank == 0 then
added_blank = 1
outfile:write_line("") -- extra separation line
end
childLayout[2] = string_pad(childNode.name, longestChildLabel + 1)
childLayout[4] = string.format("%6d", childNode.count)
if header.instrument then
childLayout[7] = string.format("%4.2f", Decimal.Format(childNode.total_time, timeDegree))
childLayout[9] = timeSuffix
childLayout[11] = string.format("%4.5f", Decimal.Format(childNode.total_time, timePerDegree) / childNode.count)
childLayout[13] = timeSuffix
end
outfile:write_line(table.concat(childLayout))
end
outfile:write_line("") -- extra separation line
outfile:flush()
end
outfile:write_line("\nEND")
outfile:flush()
end
--[[
Generate a callgrind/kcachegrind compatible graph representation. Note this
representation is generally used for when compress_graph is false.
--]]
function LMGraph:Callgrind(header, outfile)
outfile = outfile or LMGraph.StdOut
local sample = header.sample
local instrument = header.instrument
if not instrument and not sample then
error("Callgrind format requires instrumentation")
end
-- Layout the functions and then sort them
local functions = { }
for f,funcNode in pairs(self.functions) do
if funcNode.record ~= nil then
functions[#functions + 1] = f
end
end
table.sort(functions, CreatePrioritySort(self, LMGraph.Sorting[header.sort or "count"]))
-- Depth-first callgrind output function.
local CallgrindOutput = nil
CallgrindOutput = function(node)
if node.callgrind_reached then return end
node.callgrind_reached = true
local lineDefined = (node.record.linedefined < 0 and 0) or node.record.linedefined
outfile:write_line(("fn=(%d)"):format(node.callgrind_id))
if instrument then
outfile:write_line(("%d %d"):format(lineDefined, node.time))
else
outfile:write_line(("%d %d"):format(lineDefined, node.count))
end
if header.compress_graph then -- Ensure cycles don't exist
node.children[node.id] = nil
end
for cid,childNode in pairs(node.children) do
local count = (childNode.count <= 0 and 1) or childNode.count
outfile:write_line(("cfn=(%d)"):format(childNode.callgrind_id))
outfile:write_line(("calls=%d %d"):format(count, 0))
if instrument then
outfile:write_line(("%d %d"):format(childNode.record.parent_line, childNode.total_time))
else
outfile:write_line(("%d %d"):format(childNode.record.parent_line, childNode.child_count))
end
end
outfile:write_line("")
for cid,childNode in pairs(node.children) do
CallgrindOutput(childNode)
end
end
outfile:write_line(("events: %s"):format((instrument and "Time") or "Samples"))
outfile:write_line("")
outfile:write_line("# define function ID mapping")
for i=1,#functions do
local funcNode = self.functions[functions[i]]
local funcName = (funcNode.record.func == "0" and "main") or funcNode.name
local funcSource = funcNode.name:match("(%b())")
if funcSource ~= nil then
funcNode.callgrind_source = funcSource:sub(2, funcSource:len() - 1)
end
-- Generate a 'pseudo' name to ensure KCachegrind doesn't confuse or
-- unnecessarily compress nodes. Functions are first-class values so
-- emulate multiple export symbols.
if not header.compress_graph then
funcName = ("%s:%s"):format(funcName, funcNode.id)
end
funcNode.callgrind_id = i
outfile:write_line(("fn=(%d) %s"):format(funcNode.callgrind_id, funcName))
outfile:write_line(("fl=(%d) %s"):format(funcNode.callgrind_id, funcNode.callgrind_source or "[C]"))
end
outfile:write_line("")
outfile:write_line("# define callgraph")
CallgrindOutput(self.root)
end
Citizen.Graph = LMGraph -- cfxLua places LMGraph into "Citizen" instead of returning.