http://xml.apache.org/http://www.apache.org/http://www.w3.org/

Home

Readme
Installation
Build

API Docs
Samples
Programming
Migration
FAQs

Releases
Feedback

PDF Document

Download
Dev Snapshots
CVS Repository
Mail Archive

API Docs for SAX and DOM
 

Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

DOMParser.hpp

Go to the documentation of this file.
00001 /*
00002  * The Apache Software License, Version 1.1
00003  * 
00004  * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights
00005  * reserved.
00006  * 
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  * 
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer. 
00013  * 
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  * 
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:  
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  * 
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written 
00029  *    permission, please contact apache\@apache.org.
00030  * 
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  * 
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  * 
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 1999, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  * $Id: DOMParser.hpp,v 1.11 2000/05/02 19:22:38 aruna1 Exp $
00059  *
00060  */
00061 
00062 #if !defined(DOMPARSER_HPP)
00063 #define DOMPARSER_HPP
00064 
00065 
00066 #include <dom/DOM_Document.hpp>
00067 #include <framework/XMLDocumentHandler.hpp>
00068 #include <framework/XMLErrorReporter.hpp>
00069 #include <framework/XMLEntityHandler.hpp>
00070 #include <util/ValueStackOf.hpp>
00071 
00072 #include <validators/DTD/DocTypeHandler.hpp>
00073 #include <dom/DOM_DocumentType.hpp>
00074 #include <validators/DTD/DTDElementDecl.hpp>
00075 #include <validators/DTD/DTDValidator.hpp>
00076 #include <util/NameIdPool.hpp>
00077 
00078 class EntityResolver;
00079 class ErrorHandler;
00080 class XMLPScanToken;
00081 class XMLScanner;
00082 class XMLValidator;
00083 
00084 
00095 class  DOMParser :
00096 
00097     public XMLDocumentHandler
00098     , public XMLErrorReporter
00099     , public XMLEntityHandler
00100     , public DocTypeHandler
00101 {
00102 public :
00103     // -----------------------------------------------------------------------
00104     //  Class types
00105     // -----------------------------------------------------------------------
00106     enum ValSchemes
00107     {
00108         Val_Never
00109         , Val_Always
00110         , Val_Auto
00111     };
00112 
00113 
00114     // -----------------------------------------------------------------------
00115     //  Constructors and Detructor
00116     // -----------------------------------------------------------------------
00117 
00119     //@{
00129     DOMParser(XMLValidator* const valToAdopt = 0);
00130 
00134     ~DOMParser();
00135 
00136     //@}
00137 
00143     void reset();
00144 
00145 
00146     // -----------------------------------------------------------------------
00147     //  Getter methods
00148     // -----------------------------------------------------------------------
00149  
00151     //@{
00152 
00162     DOM_Document getDocument();
00163 
00171     ErrorHandler* getErrorHandler();
00172 
00180     const ErrorHandler* getErrorHandler() const;
00181 
00189     EntityResolver* getEntityResolver();
00190 
00198     const EntityResolver* getEntityResolver() const;
00199 
00207     const XMLScanner& getScanner() const;
00208 
00216     const XMLValidator& getValidator() const;
00217 
00224     ValSchemes getValidationScheme() const;
00225 
00236     bool getDoNamespaces() const;
00237 
00250     bool getExitOnFirstFatalError() const;
00251 
00262     bool getExpandEntityReferences() const;
00263 
00274     bool getIncludeIgnorableWhitespace() const;
00275 
00285     bool getToCreateXMLDeclTypeNode() const;
00286 
00287 
00288     //@}
00289 
00290 
00291     // -----------------------------------------------------------------------
00292     //  Setter methods
00293     // -----------------------------------------------------------------------
00294 
00296     //@{
00297 
00311     void setErrorHandler(ErrorHandler* const handler);
00312 
00328     void setEntityResolver(EntityResolver* const handler);
00329 
00348     void setDoNamespaces(const bool newState);
00349 
00366     void setExitOnFirstFatalError(const bool newState);
00367 
00381     void setExpandEntityReferences(const bool expand);
00382 
00401     void setIncludeIgnorableWhitespace(const bool include);
00402 
00414     void setValidationScheme(const ValSchemes newScheme);
00415 
00426     void setToCreateXMLDeclTypeNode(const bool create);
00427     
00428     //@}
00429 
00430 
00431     // -----------------------------------------------------------------------
00432     //  Parsing methods
00433     // -----------------------------------------------------------------------
00434 
00436     //@{
00437 
00451     void parse(const InputSource& source, const bool reuseValidator = false);
00452 
00467     void parse(const XMLCh* const systemId, const bool reuseValidator = false);
00468 
00480     void parse(const char* const systemId, const bool reuseValidator = false);
00481 
00511     bool parseFirst
00512     (
00513         const   XMLCh* const    systemId
00514         ,       XMLPScanToken&  toFill
00515         , const bool            reuseValidator = false
00516     );
00517 
00548     bool parseFirst
00549     (
00550         const   char* const     systemId
00551         ,       XMLPScanToken&  toFill
00552         , const bool            reuseValidator = false
00553     );
00554 
00585     bool parseFirst
00586     (
00587         const   InputSource&    source
00588         ,       XMLPScanToken&  toFill
00589         , const bool            reuseValidator = false
00590     );
00591 
00614     bool parseNext(XMLPScanToken& token);
00615 
00641     void parseReset(XMLPScanToken& token);
00642 
00643     //@}
00644 
00645 
00646 
00647     // -----------------------------------------------------------------------
00648     //  Implementation of the XMLErrorReporter interface.
00649     // -----------------------------------------------------------------------
00650 
00652     //@{
00653 
00678     virtual void error
00679     (
00680         const   unsigned int                errCode
00681         , const XMLCh* const                msgDomain
00682         , const XMLErrorReporter::ErrTypes  errType
00683         , const XMLCh* const                errorText
00684         , const XMLCh* const                systemId
00685         , const XMLCh* const                publicId
00686         , const unsigned int                lineNum
00687         , const unsigned int                colNum
00688     );
00689 
00698     virtual void resetErrors();
00699     //@}
00700 
00701 
00702     // -----------------------------------------------------------------------
00703     //  Implementation of the XMLEntityHandler interface.
00704     // -----------------------------------------------------------------------
00705 
00707     //@{
00708 
00721     virtual void endInputSource(const InputSource& inputSource);
00722 
00738     virtual bool expandSystemId
00739     (
00740         const   XMLCh* const    systemId
00741         ,       XMLBuffer&      toFill
00742     );
00743 
00752     virtual void resetEntities();
00753 
00769     virtual InputSource* resolveEntity
00770     (
00771         const   XMLCh* const    publicId
00772         , const XMLCh* const    systemId
00773     );
00774 
00787     virtual void startInputSource(const InputSource& inputSource);
00788 
00789     //@}
00790 
00791 
00792 
00793     // -----------------------------------------------------------------------
00794     //  Implementation of the XMLDocumentHandler interface.
00795     // -----------------------------------------------------------------------
00796 
00798     //@{
00799 
00812     virtual void docCharacters
00813     (
00814         const   XMLCh* const    chars
00815         , const unsigned int    length
00816         , const bool            cdataSection
00817     );
00818 
00827     virtual void docComment
00828     (
00829         const   XMLCh* const    comment
00830     );
00831 
00844     virtual void docPI
00845     (
00846         const   XMLCh* const    target
00847         , const XMLCh* const    data
00848     );
00849 
00854     virtual void endDocument();
00855 
00869     virtual void endElement
00870     (
00871         const   XMLElementDecl& elemDecl
00872         , const unsigned int    urlId
00873         , const bool            isRoot
00874     );
00875 
00884     virtual void endEntityReference
00885     (
00886         const   XMLEntityDecl&  entDecl
00887     );
00888 
00907     virtual void ignorableWhitespace
00908     (
00909         const   XMLCh* const    chars
00910         , const unsigned int    length
00911         , const bool            cdataSection
00912     );
00913 
00920     virtual void resetDocument();
00921 
00926     virtual void startDocument();
00927 
00955     virtual void startElement
00956     (
00957         const   XMLElementDecl&         elemDecl
00958         , const unsigned int            urlId
00959         , const XMLCh* const            elemPrefix
00960         , const RefVectorOf<XMLAttr>&   attrList
00961         , const unsigned int            attrCount
00962         , const bool                    isEmpty
00963         , const bool                    isRoot
00964     );
00965 
00975     virtual void startEntityReference
00976     (
00977         const   XMLEntityDecl&  entDecl
00978     );
00979 
00998     virtual void XMLDecl
00999     (
01000         const   XMLCh* const    versionStr
01001         , const XMLCh* const    encodingStr
01002         , const XMLCh* const    standaloneStr
01003         , const XMLCh* const    actualEncStr
01004     );
01005     //@}
01006 
01007 
01009     //@{
01020     bool getDoValidation() const;
01021 
01035     void setDoValidation(const bool newState);
01036     //doctypehandler interfaces
01037     virtual void attDef
01038     (
01039         const   DTDElementDecl&     elemDecl
01040         , const DTDAttDef&          attDef
01041         , const bool                ignoring
01042     );
01043 
01044     virtual void doctypeComment
01045     (
01046         const   XMLCh* const    comment
01047     );
01048 
01049     virtual void doctypeDecl
01050     (
01051         const   DTDElementDecl& elemDecl
01052         , const XMLCh* const    publicId
01053         , const XMLCh* const    systemId
01054         , const bool            hasIntSubset
01055     );
01056 
01057     virtual void doctypePI
01058     (
01059         const   XMLCh* const    target
01060         , const XMLCh* const    data
01061     );
01062 
01063     virtual void doctypeWhitespace
01064     (
01065         const   XMLCh* const    chars
01066         , const unsigned int    length
01067     );
01068 
01069     virtual void elementDecl
01070     (
01071         const   DTDElementDecl& decl
01072         , const bool            isIgnored
01073     );
01074 
01075     virtual void endAttList
01076     (
01077         const   DTDElementDecl& elemDecl
01078     );
01079 
01080     virtual void endIntSubset();
01081 
01082     virtual void endExtSubset();
01083 
01084     virtual void entityDecl
01085     (
01086         const   DTDEntityDecl&  entityDecl
01087         , const bool            isPEDecl
01088         , const bool            isIgnored
01089     );
01090 
01091     virtual void resetDocType();
01092 
01093     virtual void notationDecl
01094     (
01095         const   XMLNotationDecl&    notDecl
01096         , const bool                isIgnored
01097     );
01098 
01099     virtual void startAttList
01100     (
01101         const   DTDElementDecl& elemDecl
01102     );
01103 
01104     virtual void startIntSubset();
01105 
01106     virtual void startExtSubset();
01107 
01108     virtual void TextDecl
01109     (
01110         const   XMLCh* const    versionStr
01111         , const XMLCh* const    encodingStr
01112     );
01113 
01114     
01115     //@}
01116 
01117 
01118 protected :
01119     // -----------------------------------------------------------------------
01120     //  Protected getter methods
01121     // -----------------------------------------------------------------------
01122 
01124     //@{
01130     DOM_Node getCurrentNode();
01131 
01132     //@}
01133 
01134 
01135     // -----------------------------------------------------------------------
01136     //  Protected setter methods
01137     // -----------------------------------------------------------------------
01138 
01140     //@{
01141 
01149     void setCurrentNode(DOM_Node toSet);
01150 
01157     void setDocument(DOM_Document toSet);
01158     //@}
01159 
01160 
01161 private :
01162     //local private function to populate the doctype data
01163     virtual void populateDocumentType();
01164 
01165     // -----------------------------------------------------------------------
01166     //  Private data members
01167     //
01168     //  fCurrentNode
01169     //  fCurrentParent
01170     //      Used to track the current node during nested element events. Since
01171     //      the tree must be built from a set of disjoint callbacks, we need
01172     //      these to keep up with where we currently are.
01173     //
01174     //  fDocument
01175     //      The root document object, filled with the document contents.
01176     //
01177     //  fEntityResolver
01178     //      The installed SAX entity resolver, if any. Null if none.
01179     //
01180     //  fErrorHandler
01181     //      The installed SAX error handler, if any. Null if none.
01182     //
01183     //  fExpandEntityReference
01184     //      Indicates whether entity reference nodes should be expanded to
01185     //      its constituent text nodes or just created a single (end result)
01186     //      text node.
01187     //
01188     //  fIncludeIgnorableWhitespace
01189     //      Indicates whether ignorable whiltespace should be added to
01190     //      the DOM tree for validating parsers.
01191     //
01192     //  fNodeStack
01193     //      Used to track previous parent nodes during nested element events.
01194     //
01195     //  fParseInProgress
01196     //      Used to prevent multiple entrance to the parser while its doing
01197     //      a parse.
01198     //
01199     //  fScanner
01200     //      The scanner used for this parser. This is created during the
01201     //      constructor.
01202     //
01203     //  fValidator
01204     //      The validator that is installed. If none is provided, we will
01205     //      create and install a DTD validator. We install this on the
01206     //      scanner we create, which it will use to do validation. We set
01207     //      ourself on it as the error reporter for validity errors.
01208     //
01209     //  fWithinElement
01210     //      A flag to indicate that the parser is within at least one level
01211     //      of element processing.
01212     //
01213     //  fDocumentType
01214     //      Used to store and update the documentType variable information
01215     //      in fDocument
01216     //
01217     //  fOldDocTypeHandler
01218     //      Used to chain the old documentType node if the user has set it 
01219     //      from outside
01220     //
01221     //  fToCreateXMLDecTypeNode
01222     //      A flag to create a DOM_XMLDecl node in the ODM tree if it exists
01223     //      This is an extension to xerces implementation 
01224     //
01225     // -----------------------------------------------------------------------
01226     DOM_Node                fCurrentParent;
01227     DOM_Node                fCurrentNode;
01228     DOM_Document            fDocument;
01229     EntityResolver*         fEntityResolver;
01230     ErrorHandler*           fErrorHandler;
01231     bool                    fExpandEntityReferences;
01232     bool                    fIncludeIgnorableWhitespace;
01233     ValueStackOf<DOM_Node>* fNodeStack;
01234     bool                    fParseInProgress;
01235     XMLScanner*             fScanner;
01236     XMLValidator*           fValidator;
01237     bool                    fWithinElement;
01238     DocumentTypeImpl*       fDocumentType;
01239     DocTypeHandler*         fOldDocTypeHandler;
01240     bool                    fToCreateXMLDeclTypeNode;
01241 };
01242 
01243 
01244 
01245 // ---------------------------------------------------------------------------
01246 //  DOMParser: Handlers for the XMLEntityHandler interface
01247 // ---------------------------------------------------------------------------
01248 inline void DOMParser::endInputSource(const InputSource&)
01249 {
01250     // The DOM entity resolver doesn't handle this
01251 }
01252 
01253 inline bool DOMParser::expandSystemId(const XMLCh* const, XMLBuffer&)
01254 {
01255     // The DOM entity resolver doesn't handle this
01256     return false;
01257 }
01258 
01259 inline void DOMParser::resetEntities()
01260 {
01261     // Nothing to do on this one
01262 }
01263 
01264 inline void DOMParser::startInputSource(const InputSource&)
01265 {
01266     // The DOM entity resolver doesn't handle this
01267 }
01268 
01269 
01270 // ---------------------------------------------------------------------------
01271 //  DOMParser: Handlers for the XMLDocumentHandler interface
01272 // ---------------------------------------------------------------------------
01273 inline void DOMParser::endDocument()
01274 {
01275     // Not used in DOM at this time
01276 }
01277 
01278 
01279 
01280 // ---------------------------------------------------------------------------
01281 //  DOMParser: Getter methods
01282 // ---------------------------------------------------------------------------
01283 inline DOM_Document DOMParser::getDocument()
01284 {
01285     return fDocument;
01286 }
01287 
01288 inline ErrorHandler* DOMParser::getErrorHandler()
01289 {
01290     return fErrorHandler;
01291 }
01292 
01293 inline const ErrorHandler* DOMParser::getErrorHandler() const
01294 {
01295     return fErrorHandler;
01296 }
01297 
01298 inline EntityResolver* DOMParser::getEntityResolver()
01299 {
01300     return fEntityResolver;
01301 }
01302 
01303 inline const EntityResolver* DOMParser::getEntityResolver() const
01304 {
01305     return fEntityResolver;
01306 }
01307 
01308 inline bool DOMParser::getExpandEntityReferences() const
01309 {
01310     return fExpandEntityReferences;
01311 }
01312 
01313 inline bool DOMParser::getIncludeIgnorableWhitespace() const
01314 {
01315     return fIncludeIgnorableWhitespace;
01316 }
01317 
01318 inline const XMLScanner& DOMParser::getScanner() const
01319 {
01320     return *fScanner;
01321 }
01322 
01323 inline bool DOMParser::getToCreateXMLDeclTypeNode() const
01324 {
01325     return fToCreateXMLDeclTypeNode;
01326 }
01327 
01328 
01329 // ---------------------------------------------------------------------------
01330 //  DOMParser: Setter methods
01331 // ---------------------------------------------------------------------------
01332 inline void DOMParser::setExpandEntityReferences(const bool expand)
01333 {
01334     fExpandEntityReferences = expand;
01335 }
01336 
01337 inline void DOMParser::setIncludeIgnorableWhitespace(const bool include)
01338 {
01339     fIncludeIgnorableWhitespace = include;
01340 }
01341 
01342 inline void DOMParser::setToCreateXMLDeclTypeNode(const bool create)
01343 {
01344     fToCreateXMLDeclTypeNode = create;
01345 }
01346 
01347 
01348 // ---------------------------------------------------------------------------
01349 //  DOMParser: Protected getter methods
01350 // ---------------------------------------------------------------------------
01351 inline DOM_Node DOMParser::getCurrentNode()
01352 {
01353     return fCurrentNode;
01354 }
01355 
01356 
01357 // ---------------------------------------------------------------------------
01358 //  DOMParser: Protected setter methods
01359 // ---------------------------------------------------------------------------
01360 inline void DOMParser::setCurrentNode(DOM_Node toSet)
01361 {
01362     fCurrentNode = toSet;
01363 }
01364 
01365 inline void DOMParser::setDocument(DOM_Document toSet)
01366 {
01367     fDocument = toSet;
01368 }
01369 
01370 #endif


Copyright © 2000 The Apache Software Foundation. All Rights Reserved.