summaryrefslogtreecommitdiffstats
path: root/library/preprocessor.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'library/preprocessor.cpp')
-rw-r--r--library/preprocessor.cpp334
1 files changed, 334 insertions, 0 deletions
diff --git a/library/preprocessor.cpp b/library/preprocessor.cpp
new file mode 100644
index 0000000..31614f3
--- /dev/null
+++ b/library/preprocessor.cpp
@@ -0,0 +1,334 @@
+/**
+ * This is ported from Preprocessor.php for performance.
+ */
+
+// #include <Winsock2.h>
+#include <arpa/inet.h>
+#include <algorithm>
+#include <fstream>
+#include <map>
+#include <queue>
+#include <string>
+#include <vector>
+
+/**
+ * Fileformat version. Embedded in the output for parsers to use.
+ */
+#define FILE_FORMAT_VERSION 7
+
+// NR_FORMAT = 'V' - unsigned long 32 bit little endian
+
+/**
+ * Size, in bytes, of the above number format
+ */
+#define NR_SIZE 4
+
+/**
+ * String name of main function
+ */
+#define ENTRY_POINT "{main}"
+
+struct ProxyData
+{
+ ProxyData(int _calledIndex, int _lnr, int _cost) :
+ calledIndex(_calledIndex), lnr(_lnr), cost(_cost)
+ {}
+
+ int calledIndex;
+ int lnr;
+ int cost;
+};
+
+struct CallData
+{
+ CallData(int _functionNr, int _line) :
+ functionNr(_functionNr), line(_line), callCount(0), summedCallCost(0)
+ {}
+
+ int functionNr;
+ int line;
+ int callCount;
+ int summedCallCost;
+};
+
+inline CallData& insertGetOrderedMap(int functionNr, int line, std::map<int, size_t>& keyMap, std::vector<CallData>& data)
+{
+ int key = functionNr ^ (line << 16) ^ (line >> 16);
+ std::map<int, size_t>::iterator kmItr = keyMap.find(key);
+ if (kmItr != keyMap.end()) {
+ return data[kmItr->second];
+ }
+ keyMap[key] = data.size();
+ data.push_back(CallData(functionNr, line));
+ return data.back();
+}
+
+struct FunctionData
+{
+ FunctionData(const std::string& _filename, int _line, int _cost) :
+ filename(_filename),
+ line(_line),
+ invocationCount(1),
+ summedSelfCost(_cost),
+ summedInclusiveCost(_cost)
+ {}
+
+ std::string filename;
+ int line;
+ int invocationCount;
+ int summedSelfCost;
+ int summedInclusiveCost;
+ std::vector<CallData> calledFromInformation;
+ std::vector<CallData> subCallInformation;
+
+ CallData& getCalledFromData(int _functionNr, int _line)
+ {
+ return insertGetOrderedMap(_functionNr, _line, calledFromMap, calledFromInformation);
+ }
+
+ CallData& getSubCallData(int _functionNr, int _line)
+ {
+ return insertGetOrderedMap(_functionNr, _line, subCallMap, subCallInformation);
+ }
+
+private:
+ std::map<int, size_t> calledFromMap;
+ std::map<int, size_t> subCallMap;
+};
+
+class Webgrind_Preprocessor
+{
+public:
+
+ /**
+ * Extract information from inFile and store in preprocessed form in outFile
+ *
+ * @param inFile Callgrind file to read
+ * @param outFile File to write preprocessed data to
+ * @param proxyFunctions Functions to skip, treated as proxies
+ */
+ void parse(const char* inFile, const char* outFile, std::vector<std::string>& proxyFunctions)
+ {
+ std::ifstream in(inFile);
+ std::ofstream out(outFile, std::ios::out | std::ios::binary | std::ios::trunc);
+
+ std::map< int, std::queue<ProxyData> > proxyQueue;
+ int nextFuncNr = 0;
+ std::map<std::string, int> functionNames;
+ std::vector<FunctionData> functions;
+ std::vector<std::string> headers;
+
+ std::string line;
+ std::string buffer;
+ int lnr;
+ int cost;
+ int index;
+
+ // Read information into memory
+ while (std::getline(in, line)) {
+ if (line.compare(0, 3, "fl=") == 0) {
+ // Found invocation of function. Read function name
+ std::string function;
+ std::getline(in, function);
+ function.erase(0, 3);
+ getCompressedName(function, false);
+ // Special case for ENTRY_POINT - it contains summary header
+ if (function == ENTRY_POINT) {
+ std::getline(in, buffer);
+ std::getline(in, buffer);
+ headers.push_back(buffer);
+ std::getline(in, buffer);
+ }
+ // Cost line
+ in >> lnr >> cost;
+ std::getline(in, buffer);
+
+ std::map<std::string, int>::const_iterator fnItr = functionNames.find(function);
+ if (fnItr == functionNames.end()) {
+ index = nextFuncNr++;
+ functionNames[function] = index;
+ if (std::binary_search(proxyFunctions.begin(), proxyFunctions.end(), function)) {
+ proxyQueue[index];
+ }
+ line.erase(0, 3);
+ getCompressedName(line, true);
+ functions.push_back(FunctionData(line, lnr, cost));
+ } else {
+ index = fnItr->second;
+ FunctionData& funcData = functions[index];
+ funcData.invocationCount++;
+ funcData.summedSelfCost += cost;
+ funcData.summedInclusiveCost += cost;
+ }
+ } else if (line.compare(0, 4, "cfn=") == 0) {
+ // Found call to function. ($function/$index should contain function call originates from)
+ line.erase(0, 4);
+ getCompressedName(line, false); // calledFunctionName
+ // Skip call line
+ std::getline(in, buffer);
+ // Cost line
+ in >> lnr >> cost;
+ std::getline(in, buffer);
+
+ int calledIndex = functionNames[line];
+
+ // Current function is a proxy -> skip
+ std::map< int, std::queue<ProxyData> >::iterator pqItr = proxyQueue.find(index);
+ if (pqItr != proxyQueue.end()) {
+ pqItr->second.push(ProxyData(calledIndex, lnr, cost));
+ continue;
+ }
+
+ // Called a proxy
+ pqItr = proxyQueue.find(calledIndex);
+ if (pqItr != proxyQueue.end()) {
+ ProxyData& data = pqItr->second.front();
+ calledIndex = data.calledIndex;
+ lnr = data.lnr;
+ cost = data.cost;
+ pqItr->second.pop();
+ }
+
+ functions[index].summedInclusiveCost += cost;
+
+ CallData& calledFromData = functions[calledIndex].getCalledFromData(index, lnr);
+
+ calledFromData.callCount++;
+ calledFromData.summedCallCost += cost;
+
+ CallData& subCallData = functions[index].getSubCallData(calledIndex, lnr);
+
+ subCallData.callCount++;
+ subCallData.summedCallCost += cost;
+
+ } else if (line.find(": ") != std::string::npos) {
+ // Found header
+ headers.push_back(line);
+ }
+ }
+ in.close();
+
+ std::vector<std::string> reFunctionNames(functionNames.size());
+ for (std::map<std::string, int>::const_iterator fnItr = functionNames.begin();
+ fnItr != functionNames.end(); ++fnItr) {
+ reFunctionNames[fnItr->second] = fnItr->first;
+ }
+
+ // Write output
+ std::vector<uint32_t> writeBuff;
+ writeBuff.push_back(FILE_FORMAT_VERSION);
+ writeBuff.push_back(0);
+ writeBuff.push_back(functions.size());
+ writeBuffer(out, writeBuff);
+ // Make room for function addresses
+ out.seekp(NR_SIZE * functions.size(), std::ios::cur);
+ std::vector<uint32_t> functionAddresses;
+ for (size_t index = 0; index < functions.size(); ++index) {
+ functionAddresses.push_back(out.tellp());
+ FunctionData& function = functions[index];
+ writeBuff.push_back(function.line);
+ writeBuff.push_back(function.summedSelfCost);
+ writeBuff.push_back(function.summedInclusiveCost);
+ writeBuff.push_back(function.invocationCount);
+ writeBuff.push_back(function.calledFromInformation.size());
+ writeBuff.push_back(function.subCallInformation.size());
+ writeBuffer(out, writeBuff);
+ // Write called from information
+ for (std::vector<CallData>::const_iterator cfiItr = function.calledFromInformation.begin();
+ cfiItr != function.calledFromInformation.end(); ++cfiItr) {
+ const CallData& call = *cfiItr;
+ writeBuff.push_back(call.functionNr);
+ writeBuff.push_back(call.line);
+ writeBuff.push_back(call.callCount);
+ writeBuff.push_back(call.summedCallCost);
+ writeBuffer(out, writeBuff);
+ }
+ // Write sub call information
+ for (std::vector<CallData>::const_iterator sciItr = function.subCallInformation.begin();
+ sciItr != function.subCallInformation.end(); ++sciItr) {
+ const CallData& call = *sciItr;
+ writeBuff.push_back(call.functionNr);
+ writeBuff.push_back(call.line);
+ writeBuff.push_back(call.callCount);
+ writeBuff.push_back(call.summedCallCost);
+ writeBuffer(out, writeBuff);
+ }
+
+ out << function.filename << '\n' << reFunctionNames[index] << '\n';
+ }
+ size_t headersPos = out.tellp();
+ // Write headers
+ for (std::vector<std::string>::const_iterator hItr = headers.begin();
+ hItr != headers.end(); ++hItr) {
+ out << *hItr << '\n';
+ }
+
+ // Write addresses
+ out.seekp(NR_SIZE, std::ios::beg);
+ writeBuff.push_back(headersPos);
+ writeBuffer(out, writeBuff);
+ // Skip function count
+ out.seekp(NR_SIZE, std::ios::cur);
+ // Write function addresses
+ writeBuffer(out, functionAddresses);
+
+ out.close();
+ }
+
+private:
+
+ void getCompressedName(std::string& name, bool isFile)
+ {
+ if (name[0] != '(' || !std::isdigit(name[1])) {
+ return;
+ }
+ int functionIndex = std::atoi(name.c_str() + 1);
+ size_t idx = name.find(')');
+ if (idx + 2 < name.length()) {
+ name.erase(0, idx + 2);
+ compressedNames[isFile][functionIndex] = name;
+ } else {
+ std::map<int, std::string>::iterator nmIt = compressedNames[isFile].find(functionIndex);
+ if (nmIt != compressedNames[isFile].end()) {
+ name = nmIt->second; // should always exist for valid files
+ }
+ }
+ }
+
+ void writeBuffer(std::ostream& out, std::vector<uint32_t>& buffer)
+ {
+ for (std::vector<uint32_t>::iterator bItr = buffer.begin(); bItr != buffer.end(); ++bItr) {
+ *bItr = toLittleEndian32(*bItr);
+ }
+ out.write(reinterpret_cast<const char*>(&buffer.front()), sizeof(uint32_t) * buffer.size());
+ buffer.clear();
+ }
+
+ uint32_t toLittleEndian32(uint32_t value)
+ {
+ value = htonl(value);
+ uint32_t result = 0;
+ result |= (value & 0x000000FF) << 24;
+ result |= (value & 0x0000FF00) << 8;
+ result |= (value & 0x00FF0000) >> 8;
+ result |= (value & 0xFF000000) >> 24;
+ return result;
+ }
+
+ std::map<int, std::string> compressedNames [2];
+};
+
+int main(int argc, char* argv[])
+{
+ if (argc < 3) {
+ return 1;
+ }
+ std::vector<std::string> proxyFunctions;
+ for (int argIdx = 3; argIdx < argc; ++ argIdx) {
+ proxyFunctions.push_back(argv[argIdx]);
+ }
+ std::sort(proxyFunctions.begin(), proxyFunctions.end());
+ Webgrind_Preprocessor processor;
+ processor.parse(argv[1], argv[2], proxyFunctions);
+ return 0;
+}