From 2c993f905f46629fef67fafaab05c28dd85a713b Mon Sep 17 00:00:00 2001 From: Peter Schrammel Date: Tue, 5 Jul 2016 23:59:17 +0100 Subject: [PATCH] output-repair tool to fix truncated XML and JSON log files --- src/Makefile | 6 +- src/output-repair/Makefile | 25 ++++++ src/output-repair/json_repair.cpp | 91 ++++++++++++++++++++++ src/output-repair/json_repair.h | 16 ++++ src/output-repair/output_repair.cpp | 106 +++++++++++++++++++++++++ src/output-repair/xml_repair.cpp | 116 ++++++++++++++++++++++++++++ src/output-repair/xml_repair.h | 16 ++++ 7 files changed, 374 insertions(+), 2 deletions(-) create mode 100644 src/output-repair/Makefile create mode 100644 src/output-repair/json_repair.cpp create mode 100644 src/output-repair/json_repair.h create mode 100644 src/output-repair/output_repair.cpp create mode 100644 src/output-repair/xml_repair.cpp create mode 100644 src/output-repair/xml_repair.h diff --git a/src/Makefile b/src/Makefile index 7b9e3a1d446..29cf67894d4 100644 --- a/src/Makefile +++ b/src/Makefile @@ -2,9 +2,9 @@ DIRS = ansi-c big-int cbmc cpp goto-cc goto-instrument goto-programs \ goto-symex langapi pointer-analysis solvers util linking xmllang \ assembler analyses java_bytecode aa-path-symex path-symex musketeer \ json cegis goto-analyzer jsil symex goto-diff aa-symex clobber \ - memory-models + memory-models output-repair -all: cbmc.dir goto-cc.dir goto-instrument.dir symex.dir goto-analyzer.dir goto-diff.dir +all: cbmc.dir goto-cc.dir goto-instrument.dir symex.dir goto-analyzer.dir goto-diff.dir output-repair.dir ############################################################################### @@ -46,6 +46,8 @@ symex.dir: languages goto-programs.dir pointer-analysis.dir \ aa-symex.dir: symex.dir aa-path-symex.dir +output-repair.dir: + # building for a particular directory $(patsubst %, %.dir, $(DIRS)): diff --git a/src/output-repair/Makefile b/src/output-repair/Makefile new file mode 100644 index 00000000000..bb3da4f0682 --- /dev/null +++ b/src/output-repair/Makefile @@ -0,0 +1,25 @@ +SRC = output_repair.cpp xml_repair.cpp json_repair.cpp ../util/unicode.cpp + +OBJ += + +INCLUDES= -I .. + +LIBS = + +CLEANFILES = output-repair$(EXEEXT) + +include ../config.inc +include ../common + +all: output-repair$(EXEEXT) + +############################################################################### + +output-repair$(EXEEXT): $(OBJ) + $(LINKBIN) + +.PHONY: output-repair-mac-signed + +output-repair-mac-signed: output-repair$(EXEEXT) + codesign -v -s $(OSX_IDENTITY) output-repair$(EXEEXT) + diff --git a/src/output-repair/json_repair.cpp b/src/output-repair/json_repair.cpp new file mode 100644 index 00000000000..c7b275d8ee5 --- /dev/null +++ b/src/output-repair/json_repair.cpp @@ -0,0 +1,91 @@ +/*******************************************************************\ + +Module: JSON repair tool + +Author: Peter Schrammel + +\*******************************************************************/ + +#include +#include +#include +#include + +#include + +#include "json_repair.h" + +// cut non-closed branches at this level +#define BACKTRACK_LEVEL 2 + +/*******************************************************************\ + +Function: json_repair + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +void json_repair(std::ifstream &infile, std::ofstream &outfile) +{ + std::stack elements; // element stack (object=true) + bool instring=false; // within a string + char c, lastc=' '; // current and last character + std::stringstream backtrackbuffer; // buffer for cutting branches + + while(infile >> std::noskipws >> c) + { + backtrackbuffer << c; + switch(c) + { + case '[': + if(!instring) + elements.push(false); + break; + case '{': + if(!instring) + elements.push(true); + break; + case '"': + if(lastc!='\\') + instring=!instring; + break; + case ']': + case '}': + if(!instring) + { + if(elements.size()<=BACKTRACK_LEVEL) + { + // at this level everything in the buffer is part of the output + outfile << backtrackbuffer.str(); + // clear buffer + backtrackbuffer.seekp(0); + backtrackbuffer.seekg(0); + backtrackbuffer.str(""); + backtrackbuffer.clear(); + } + elements.pop(); + if(elements.empty()) // cut trailing garbage + return; + } + break; + default: break; + } + lastc=c; + } + + // everything above BACKTRACK_LEVEL is discarded + // now, add missing closing elements below it: + while(!elements.empty()) + { + if(elements.size() + +void json_repair(std::ifstream &infile, std::ofstream &outfile); + +#endif // CPROVER_OUTPUT_REPAIR_JSON_REPAIR_H diff --git a/src/output-repair/output_repair.cpp b/src/output-repair/output_repair.cpp new file mode 100644 index 00000000000..0ae976530f3 --- /dev/null +++ b/src/output-repair/output_repair.cpp @@ -0,0 +1,106 @@ +/*******************************************************************\ + +Module: Output repair tool + +Author: Peter Schrammel + +\*******************************************************************/ + +#include +#include + +#include "json_repair.h" +#include "xml_repair.h" + +/*******************************************************************\ + +Function: repair + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +int repair(const char **argv) +{ + std::string first(argv[1]); + std::string last(argv[3]); + bool json=(first=="--json") || (last=="--json"); + bool xml=(first=="--xml") || (last=="--xml"); + bool is_first=(first=="--json") || (first=="--xml"); + + std::string infilename(argv[1]); + std::string outfilename(argv[2]); + if(is_first) + { + infilename=std::string(argv[2]); + outfilename=std::string(argv[3]); + } + + std::ifstream infile; + infile.open(infilename); + if(!infile.is_open()) + { + std::cerr << "Cannot open file '" << infilename << "'" << "\n\n"; + return -1; + } + + std::ofstream outfile; + outfile.open(outfilename); + if(!outfile.is_open()) + { + std::cerr << "Cannot open file '" << outfilename << "'" << "\n\n"; + return -1; + } + + if(json) + json_repair(infile, outfile); + else if(xml) + xml_repair(infile, outfile); + else assert(false); + + infile.close(); + outfile.close(); + + return 0; +} + +/*******************************************************************\ + +Function: main + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +#ifdef _MSC_VER +int wmain(int argc, const wchar_t **argv_wide) +{ + const char **argv=narrow_argv(argc, argv_wide); + if(argc!=4) + { + std::cerr << "Usage: output-repair.exe " + << "(--json | --xml) outfile>\n\n"; + return -1; + } + return repair(argv); +} +#else +int main(int argc, const char **argv) +{ + if(argc!=4) + { + std::cerr << "Usage: output-repair " + << "(--json | --xml) \n\n"; + return -1; + } + return repair(argv); +} +#endif diff --git a/src/output-repair/xml_repair.cpp b/src/output-repair/xml_repair.cpp new file mode 100644 index 00000000000..b3cee855855 --- /dev/null +++ b/src/output-repair/xml_repair.cpp @@ -0,0 +1,116 @@ +/*******************************************************************\ + +Module: XML repair tool + +Author: Peter Schrammel + +\*******************************************************************/ + +#include + +#include +#include +#include +#include + +#include "xml_repair.h" + +// cut non-closed branches at this level +#define BACKTRACK_LEVEL 1 + +/*******************************************************************\ + +Function: xml_repair + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +void xml_repair(std::ifstream &infile, std::ofstream &outfile) +{ + std::stack tags; // tag stack + bool intag=false; // within a tag + bool intagname=false; // within a tag name + bool closingtag=false; // it's a closing tag + std::stringstream tag; // the tag name + std::stringstream backtrackbuffer; // buffer for cutting branches + char c, lastc=' '; // current and last character + + while(infile >> std::noskipws >> c) + { + backtrackbuffer << c; + switch(c) + { + case '<' : intag=intagname=true; break; + case ' ' : + if(intag) + intagname=false; + break; + case '/' : + if(intag) + { + if(lastc=='<') + closingtag=true; + else + intagname=false; + } + break; + case '>' : + if(!closingtag && lastc!='/') // end of opening tag + { + if(lastc!='?') + { + tags.push(tag.str()); + } + } + else // end of closing tag + { + if(lastc!='/') + { + tags.pop(); + } + } + // clear tag name buffer + tag.seekp(0); + tag.seekg(0); + tag.str(""); + tag.clear(); + + if(tags.size()<=BACKTRACK_LEVEL) + { + // at this level everything in the buffer is part of the output + outfile << backtrackbuffer.str(); + + // clear buffer + backtrackbuffer.seekp(0); + backtrackbuffer.seekg(0); + backtrackbuffer.str(""); + backtrackbuffer.clear(); + } + intag=intagname=closingtag=false; + break; + default : + if(intagname) + tag << c; + break; + } + lastc=c; + } + if(tags.size()>0) // truncated + { + // everything above BACKTRACK_LEVEL is discarded + // now, add missing closing tags below it: + while(!tags.empty()) + { + if(tags.size()<=BACKTRACK_LEVEL) + { + outfile << std::endl << "" << std::endl; + } + tags.pop(); + } + } +} diff --git a/src/output-repair/xml_repair.h b/src/output-repair/xml_repair.h new file mode 100644 index 00000000000..0a8fc14baba --- /dev/null +++ b/src/output-repair/xml_repair.h @@ -0,0 +1,16 @@ +/*******************************************************************\ + +Module: XML repair tool + +Author: Peter Schrammel + +\*******************************************************************/ + +#ifndef CPROVER_OUTPUT_REPAIR_XML_REPAIR_H +#define CPROVER_OUTPUT_REPAIR_XML_REPAIR_H + +#include + +void xml_repair(std::ifstream &infile, std::ofstream &outfile); + +#endif // CPROVER_OUTPUT_REPAIR_XML_REPAIR_H