#include "precompiled.h" #include #include #include #include #include #include "ps/CLogger.h" #include "ps/Filesystem.h" #include "Xeromyces.h" #define LOG_CATEGORY "xml" #include "XML.h" int CXeromyces::XercesLoaded = 0; // for once-only initialisation // Convenient storage for the internal tree typedef struct { std::string name; utf16string value; } XMLAttribute; typedef struct XMLElement { std::string name; int linenum; utf16string text; std::vector childs; std::vector attrs; } XMLElement; class XeroHandler : public DefaultHandler { public: XeroHandler() : m_locator(NULL), Root(NULL) {} ~XeroHandler() { if (Root) DeallocateElement(Root); } // SAX2 event handlers: virtual void startDocument(); virtual void endDocument(); virtual void startElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname, const Attributes& attrs); virtual void endElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname); virtual void characters(const XMLCh* const chars, const unsigned int length); const Locator* m_locator; virtual void setDocumentLocator(const Locator* const locator) { m_locator = locator; } // Non-SAX2 stuff, used for storing the // parsed data and constructing the XMB: void CreateXMB(); WriteBuffer writeBuffer; private: std::set ElementNames; std::set AttributeNames; XMLElement* Root; XMLElement* CurrentElement; std::stack ElementStack; std::map ElementID; std::map AttributeID; void OutputElement(XMLElement* el); // Recursively frees memory void DeallocateElement(XMLElement* el); }; CXeromyces::CXeromyces() { } CXeromyces::~CXeromyces() { } void CXeromyces::Terminate() { if (XercesLoaded) { XMLPlatformUtils::Terminate(); XercesLoaded = 0; } } // Find out write location of the XMB file corresponding to xmlFilename void CXeromyces::GetXMBPath(const PIVFS& vfs, const VfsPath& xmlFilename, const VfsPath& xmbFilename, VfsPath& xmbActualPath) { // rationale: // - it is necessary to write out XMB files into a subdirectory // corresponding to the mod from which the XML file is taken. // this avoids confusion when multiple mods are active - // their XMB files' VFS filename would otherwise be indistinguishable. // - we group files in the cache/ mount point first by mod, and only // then XMB. this is so that all output files for a given mod can // easily be deleted. the operation of deleting all old/unused // XMB files requires a program anyway (to find out which are no // longer needed), so it's not a problem that XMB files reside in // a subdirectory (which would make manually deleting all harder). // get real path of XML file (e.g. mods/official/entities/...) Path P_XMBRealPath; vfs->GetRealPath(xmlFilename, P_XMBRealPath); // extract mod name from that char modName[PATH_MAX]; // .. NOTE: can't use %s, of course (keeps going beyond '/') int matches = sscanf(P_XMBRealPath.string().c_str(), "mods/%[^/]", modName); debug_assert(matches == 1); // build full name: cache, then mod name, XMB subdir, original XMB path xmbActualPath = VfsPath("cache/mods") / modName / "xmb" / xmbFilename; } PSRETURN CXeromyces::Load(const VfsPath& filename) { // Make sure the .xml actually exists if (! FileExists(filename)) { LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Failed to find XML file %s", filename.string().c_str()); return PSRETURN_Xeromyces_XMLOpenFailed; } // Get some data about the .xml file FileInfo fileInfo; if (g_VFS->GetFileInfo(filename, &fileInfo) < 0) { LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Failed to stat XML file %s", filename.string().c_str()); return PSRETURN_Xeromyces_XMLOpenFailed; } /* XMBs are stored with a unique name, where the name is generated from characteristics of the XML file. If a file already exists with the generated name, it is assumed that that file is a valid conversion of the XML, and so it's loaded. Otherwise, the XMB is created with that filename. This means it's never necessary to overwrite existing XMB files; since the XMBs are often in archives, it's not easy to rewrite those files, and it's not possible to switch to using a loose file because the VFS has already decided that file is inside an archive. So each XMB is given a unique name, and old ones are somehow purged. */ // Generate the filename for the xmb: // _.xmb // with mtime/size as 8-digit hex, where mtime's lowest bit is // zeroed because zip files only have 2 second resolution. const int suffixLength = 22; char suffix[suffixLength+1]; int ret = sprintf(suffix, "_%08x%08xB.xmb", (int)(fileInfo.MTime() & ~1), (int)fileInfo.Size()); debug_assert(ret == suffixLength); VfsPath xmbFilename = change_extension(filename, suffix); VfsPath xmbPath; GetXMBPath(g_VFS, filename, xmbFilename, xmbPath); // If the file exists, use it if (FileExists(xmbPath)) { if (ReadXMBFile(xmbPath)) return PSRETURN_OK; // (no longer return PSRETURN_Xeromyces_XMLOpenFailed here because // failure legitimately happens due to partially-written XMB files.) } // XMB isn't up to date with the XML, so rebuild it: // Load Xerces if necessary if (! XercesLoaded) { XMLPlatformUtils::Initialize(); XercesLoaded = 1; } // Open the .xml file CVFSInputSource source; if (source.OpenFile(filename) < 0) { LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Failed to open XML file %s", filename.string().c_str()); return PSRETURN_Xeromyces_XMLOpenFailed; } // Set up the Xerces parser SAX2XMLReader* Parser = XMLReaderFactory::createXMLReader(); // Enable validation Parser->setFeature(XMLUni::fgSAX2CoreValidation, true); Parser->setFeature(XMLUni::fgXercesDynamic, true); XeroHandler handler; Parser->setContentHandler(&handler); CXercesErrorHandler errorHandler; Parser->setErrorHandler(&errorHandler); CVFSEntityResolver entityResolver(filename.string().c_str()); Parser->setEntityResolver(&entityResolver); // Build a tree inside handler Parser->parse(source); // (It's horribly inefficient doing SAX2->tree then tree->XMB, // but the XML->XMB conversion should be done very rarely // anyway. If it's ever needed, the XMB writing can be done // directly from inside the SAX2 event handlers, although that's // a little more complex) delete Parser; if (errorHandler.GetSawErrors()) { LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Errors in XML file '%s'", filename.string().c_str()); return PSRETURN_Xeromyces_XMLParseError; // The internal tree of the XeroHandler will be cleaned up automatically } // Convert the data structures into the XMB format handler.CreateXMB(); // Save the file to disk, so it can be loaded quickly next time WriteBuffer& writeBuffer = handler.writeBuffer; g_VFS->CreateFile(xmbPath, writeBuffer.Data(), writeBuffer.Size()); XMBBuffer = writeBuffer.Data(); // add a reference // Set up the XMBFile const bool ok = Initialise((const char*)XMBBuffer.get()); debug_assert(ok); return PSRETURN_OK; } bool CXeromyces::ReadXMBFile(const VfsPath& filename) { size_t size; if(g_VFS->LoadFile(filename, XMBBuffer, size) < 0) return false; debug_assert(size >= 42); // else: invalid XMB file size. (42 bytes is the smallest possible XMB. (Well, maybe not quite, but it's a nice number.)) // Set up the XMBFile if(!Initialise((const char*)XMBBuffer.get())) return false; return true; } void XeroHandler::startDocument() { Root = new XMLElement; ElementStack.push(Root); } void XeroHandler::endDocument() { } /* // Silently clobbers non-ASCII characters std::string lowercase_ascii(const XMLCh *a) { std::string b; size_t len=XMLString::stringLen(a); b.resize(len); for (size_t i = 0; i < len; ++i) b[i] = (char)towlower(a[i]); return b; } */ /** * Return an ASCII version of the given 16-bit string, ignoring * any non-ASCII characters. * * @param const XMLCh * a Input string. * @return std::string 8-bit ASCII version of a. **/ std::string toAscii( const XMLCh* a ) { std::string b; size_t len=XMLString::stringLen(a); b.reserve(len); for (size_t i = 0; i < len; ++i) { if(a[i] < 0x80) b += (char) a[i]; } return b; } void XeroHandler::startElement(const XMLCh* const UNUSED(uri), const XMLCh* const localname, const XMLCh* const UNUSED(qname), const Attributes& attrs) { std::string elementName = toAscii(localname); ElementNames.insert(elementName); // Create a new element XMLElement* e = new XMLElement; e->name = elementName; e->linenum = m_locator->getLineNumber(); // Store all the attributes in the new element for (unsigned int i = 0; i < attrs.getLength(); ++i) { std::string attrName = toAscii(attrs.getLocalName(i)); AttributeNames.insert(attrName); XMLAttribute* a = new XMLAttribute; a->name = attrName; const XMLCh *tmp = attrs.getValue(i); a->value = utf16string(tmp, tmp+XMLString::stringLen(tmp)); e->attrs.push_back(a); } // Add the element to its parent ElementStack.top()->childs.push_back(e); // Set as parent of following elements ElementStack.push(e); } void XeroHandler::endElement(const XMLCh* const UNUSED(uri), const XMLCh* const UNUSED(localname), const XMLCh* const UNUSED(qname)) { ElementStack.pop(); } void XeroHandler::characters(const XMLCh* const chars, const unsigned int UNUSED(length)) { ElementStack.top()->text += utf16string(chars, chars+XMLString::stringLen(chars)); } void XeroHandler::CreateXMB() { // Header writeBuffer.Append(UnfinishedHeaderMagicStr, 4); std::set::iterator it; int i; // Element names i = 0; int ElementCount = (int)ElementNames.size(); writeBuffer.Append(&ElementCount, 4); for (it = ElementNames.begin(); it != ElementNames.end(); ++it) { int TextLen = (int)it->length()+1; writeBuffer.Append(&TextLen, 4); writeBuffer.Append((void*)it->c_str(), TextLen); ElementID[*it] = i++; } // Attribute names i = 0; int AttributeCount = (int)AttributeNames.size(); writeBuffer.Append(&AttributeCount, 4); for (it = AttributeNames.begin(); it != AttributeNames.end(); ++it) { int TextLen = (int)it->length()+1; writeBuffer.Append(&TextLen, 4); writeBuffer.Append((void*)it->c_str(), TextLen); AttributeID[*it] = i++; } // All the XML contents must be surrounded by a single element debug_assert(Root->childs.size() == 1); OutputElement(Root->childs[0]); delete Root; Root = NULL; // file is now valid, so insert correct magic string writeBuffer.Overwrite(HeaderMagicStr, 4, 0); } // Writes a whole element (recursively if it has children) into the buffer, // and also frees all the memory that has been allocated for that element. void XeroHandler::OutputElement(XMLElement* el) { // Filled in later with the length of the element int Pos_Length = (int)writeBuffer.Size(); writeBuffer.Append("????", 4); int NameID = ElementID[el->name]; writeBuffer.Append(&NameID, 4); int AttrCount = (int)el->attrs.size(); writeBuffer.Append(&AttrCount, 4); int ChildCount = (int)el->childs.size(); writeBuffer.Append(&ChildCount, 4); // Filled in later with the offset to the list of child elements int Pos_ChildrenOffset = (int)writeBuffer.Size(); writeBuffer.Append("????", 4); // Trim excess whitespace in the entity's text, while counting // the number of newlines trimmed (so that JS error reporting // can give the correct line number) std::string whitespaceA = " \t\r\n"; utf16string whitespace (whitespaceA.begin(), whitespaceA.end()); // Find the start of the non-whitespace section size_t first = el->text.find_first_not_of(whitespace); if (first == el->text.npos) // Entirely whitespace - easy to handle el->text = utf16string(); else { // Count the number of \n being cut off, // and add them to the line number utf16string trimmed (el->text.begin(), el->text.begin()+first); el->linenum += (int)std::count(trimmed.begin(), trimmed.end(), (utf16_t)'\n'); // Find the end of the non-whitespace section, // and trim off everything else size_t last = el->text.find_last_not_of(whitespace); el->text = el->text.substr(first, 1+last-first); } // Output text, prefixed by length in bytes if (el->text.length() == 0) { // No text; don't write much writeBuffer.Append("\0\0\0\0", 4); } else { // Write length and line number and null-terminated text int NodeLen = 4 + 2*((int)el->text.length()+1); writeBuffer.Append(&NodeLen, 4); writeBuffer.Append(&el->linenum, 4); writeBuffer.Append((void*)el->text.c_str(), NodeLen-4); } // Output attributes int i; for (i = 0; i < AttrCount; ++i) { int AttrName = AttributeID[el->attrs[i]->name]; writeBuffer.Append(&AttrName, 4); int AttrLen = 2*((int)el->attrs[i]->value.length()+1); writeBuffer.Append(&AttrLen, 4); writeBuffer.Append((void*)el->attrs[i]->value.c_str(), AttrLen); // Free each attribute as soon as it's been dealt with delete el->attrs[i]; } // Go back and fill in the child-element offset int ChildrenOffset = (int)writeBuffer.Size() - (Pos_ChildrenOffset+4); writeBuffer.Overwrite(&ChildrenOffset, 4, Pos_ChildrenOffset); // Output all child nodes for (i = 0; i < ChildCount; ++i) OutputElement(el->childs[i]); // Go back and fill in the length int Length = (int)writeBuffer.Size() - Pos_Length; writeBuffer.Overwrite(&Length, 4, Pos_Length); // Tidy up the parser's mess delete el; } void XeroHandler::DeallocateElement(XMLElement* el) { size_t i; for (i = 0; i < el->attrs.size(); ++i) delete el->attrs[i]; for (i = 0; i < el->childs.size(); ++i) DeallocateElement(el->childs[i]); delete el; }