http://xml.apache.org/http://www.apache.org/http://www.w3.org/

Home

Readme
Installation
Build

API Docs
Samples
Programming
Migration
FAQs

Releases
Feedback

PDF Document

Download
Dev Snapshots
CVS Repository
Mail Archive

API Docs for SAX and DOM
 

Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

SAXParser.hpp

Go to the documentation of this file.
00001 /*
00002  * The Apache Software License, Version 1.1
00003  * 
00004  * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights
00005  * reserved.
00006  * 
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  * 
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer. 
00013  * 
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  * 
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:  
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  * 
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written 
00029  *    permission, please contact apache\@apache.org.
00030  * 
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  * 
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  * 
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 1999, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  * $Log: SAXParser.hpp,v $
00059  * Revision 1.8  2000/04/12 22:58:30  roddey
00060  * Added support for 'auto validate' mode.
00061  *
00062  * Revision 1.7  2000/03/03 01:29:34  roddey
00063  * Added a scanReset()/parseReset() method to the scanner and
00064  * parsers, to allow for reset after early exit from a progressive parse.
00065  * Added calls to new Terminate() call to all of the samples. Improved
00066  * documentation in SAX and DOM parsers.
00067  *
00068  * Revision 1.6  2000/02/17 03:54:27  rahulj
00069  * Added some new getters to query the parser state and
00070  * clarified the documentation.
00071  *
00072  * Revision 1.5  2000/02/16 03:42:58  rahulj
00073  * Finished documenting the SAX Driver implementation.
00074  *
00075  * Revision 1.4  2000/02/15 04:47:37  rahulj
00076  * Documenting the SAXParser framework. Not done yet.
00077  *
00078  * Revision 1.3  2000/02/06 07:47:56  rahulj
00079  * Year 2K copyright swat.
00080  *
00081  * Revision 1.2  1999/12/15 19:57:48  roddey
00082  * Got rid of redundant 'const' on boolean return value. Some compilers choke
00083  * on this and its useless.
00084  *
00085  * Revision 1.1.1.1  1999/11/09 01:07:51  twl
00086  * Initial checkin
00087  *
00088  * Revision 1.6  1999/11/08 20:44:54  rahul
00089  * Swat for adding in Product name and CVS comment log variable.
00090  *
00091  */
00092 
00093 #if !defined(SAXPARSER_HPP)
00094 #define SAXPARSER_HPP
00095 
00096 #include <sax/Parser.hpp>
00097 #include <internal/VecAttrListImpl.hpp>
00098 #include <framework/XMLDocumentHandler.hpp>
00099 #include <framework/XMLElementDecl.hpp>
00100 #include <framework/XMLEntityHandler.hpp>
00101 #include <framework/XMLErrorReporter.hpp>
00102 #include <validators/DTD/DocTypeHandler.hpp>
00103 
00104 class DocumentHandler;
00105 class EntityResolver;
00106 class XMLPScanToken;
00107 class XMLScanner;
00108 class XMLValidator;
00109 
00110 
00120 
00121 class  SAXParser :
00122 
00123     public Parser
00124     , public XMLDocumentHandler
00125     , public XMLErrorReporter
00126     , public XMLEntityHandler
00127     , public DocTypeHandler
00128 {
00129 public :
00130     // -----------------------------------------------------------------------
00131     //  Class types
00132     // -----------------------------------------------------------------------
00133     enum ValSchemes
00134     {
00135         Val_Never
00136         , Val_Always
00137         , Val_Auto
00138     };
00139 
00140 
00141     // -----------------------------------------------------------------------
00142     //  Constructors and Destructor
00143     // -----------------------------------------------------------------------
00144 
00146     //@{
00152     SAXParser(XMLValidator* const valToAdopt = 0);
00153 
00157     ~SAXParser();
00158     //@}
00159 
00160 
00162     //@{
00169     DocumentHandler* getDocumentHandler();
00170 
00177     const DocumentHandler* getDocumentHandler() const;
00178 
00185     EntityResolver* getEntityResolver();
00186 
00193     const EntityResolver* getEntityResolver() const;
00194 
00201     ErrorHandler* getErrorHandler();
00202 
00209     const ErrorHandler* getErrorHandler() const;
00210 
00217     const XMLScanner& getScanner() const;
00218 
00225     const XMLValidator& getValidator() const;
00226 
00233     ValSchemes getValidationScheme() const;
00234 
00244     bool getDoNamespaces() const;
00245 
00255     bool getExitOnFirstFatalError() const;
00256     //@}
00257 
00258 
00259     // -----------------------------------------------------------------------
00260     //  Setter methods
00261     // -----------------------------------------------------------------------
00262 
00264     //@{
00282     void setDoNamespaces(const bool newState);
00283 
00295     void setValidationScheme(const ValSchemes newScheme);
00296 
00312     void setExitOnFirstFatalError(const bool newState);
00313     //@}
00314 
00315 
00316     // -----------------------------------------------------------------------
00317     //  Advanced document handler list maintenance methods
00318     // -----------------------------------------------------------------------
00319 
00321     //@{
00335     void installAdvDocHandler(XMLDocumentHandler* const toInstall);
00336 
00346     bool removeAdvDocHandler(XMLDocumentHandler* const toRemove);
00347     //@}
00348 
00349 
00350     // -----------------------------------------------------------------------
00351     //  Implementation of the SAXParser interface
00352     // -----------------------------------------------------------------------
00353 
00355     //@{
00368     virtual void parse(const InputSource& source, const bool reuseValidator = false);
00369 
00382     virtual void parse(const XMLCh* const systemId, const bool reuseValidator = false);
00383 
00394     virtual void parse(const char* const systemId, const bool reuseValidator = false);
00395 
00406     virtual void setDocumentHandler(DocumentHandler* const handler);
00407 
00417     virtual void setDTDHandler(DTDHandler* const handler);
00418 
00429     virtual void setErrorHandler(ErrorHandler* const handler);
00430 
00442     virtual void setEntityResolver(EntityResolver* const resolver);
00443     //@}
00444 
00445 
00446     // -----------------------------------------------------------------------
00447     //  Progressive scan methods
00448     // -----------------------------------------------------------------------
00449 
00451     //@{
00452 
00484     bool parseFirst
00485     (
00486         const   XMLCh* const    systemId
00487         ,       XMLPScanToken&  toFill
00488         , const bool            reuseValidator = false
00489     );
00490 
00521     bool parseFirst
00522     (
00523         const   char* const     systemId
00524         ,       XMLPScanToken&  toFill
00525         , const bool            reuseValidator = false
00526     );
00527 
00558     bool parseFirst
00559     (
00560         const   InputSource&    source
00561         ,       XMLPScanToken&  toFill
00562         , const bool            reuseValidator = false
00563     );
00564 
00589     bool parseNext(XMLPScanToken& token);
00590 
00612     void parseReset(XMLPScanToken& token);
00613 
00614     //@}
00615 
00616 
00617 
00618     // -----------------------------------------------------------------------
00619     //  Implementation of the DocTypeHandler Interface
00620     // -----------------------------------------------------------------------
00621 
00623     //@{
00638     virtual void attDef
00639     (
00640         const   DTDElementDecl& elemDecl
00641         , const DTDAttDef&      attDef
00642         , const bool            ignoring
00643     );
00644 
00654     virtual void doctypeComment
00655     (
00656         const   XMLCh* const    comment
00657     );
00658 
00675     virtual void doctypeDecl
00676     (
00677         const   DTDElementDecl& elemDecl
00678         , const XMLCh* const    publicId
00679         , const XMLCh* const    systemId
00680         , const bool            hasIntSubset
00681     );
00682 
00696     virtual void doctypePI
00697     (
00698         const   XMLCh* const    target
00699         , const XMLCh* const    data
00700     );
00701 
00713     virtual void doctypeWhitespace
00714     (
00715         const   XMLCh* const    chars
00716         , const unsigned int    length
00717     );
00718 
00731     virtual void elementDecl
00732     (
00733         const   DTDElementDecl& decl
00734         , const bool            isIgnored
00735     );
00736 
00747     virtual void endAttList
00748     (
00749         const   DTDElementDecl& elemDecl
00750     );
00751 
00758     virtual void endIntSubset();
00759 
00766     virtual void endExtSubset();
00767 
00782     virtual void entityDecl
00783     (
00784         const   DTDEntityDecl&  entityDecl
00785         , const bool            isPEDecl
00786         , const bool            isIgnored
00787     );
00788 
00793     virtual void resetDocType();
00794 
00807     virtual void notationDecl
00808     (
00809         const   XMLNotationDecl&    notDecl
00810         , const bool                isIgnored
00811     );
00812 
00823     virtual void startAttList
00824     (
00825         const   DTDElementDecl& elemDecl
00826     );
00827 
00834     virtual void startIntSubset();
00835 
00842     virtual void startExtSubset();
00843 
00856     virtual void TextDecl
00857     (
00858         const   XMLCh* const    versionStr
00859         , const XMLCh* const    encodingStr
00860     );
00861     //@}
00862 
00863 
00864     // -----------------------------------------------------------------------
00865     //  Implementation of the XMLDocumentHandler interface
00866     // -----------------------------------------------------------------------
00867 
00869     //@{
00885     virtual void docCharacters
00886     (
00887         const   XMLCh* const    chars
00888         , const unsigned int    length
00889         , const bool            cdataSection
00890     );
00891 
00901     virtual void docComment
00902     (
00903         const   XMLCh* const    comment
00904     );
00905 
00925     virtual void docPI
00926     (
00927         const   XMLCh* const    target
00928         , const XMLCh* const    data
00929     );
00930 
00942     virtual void endDocument();
00943 
00960     virtual void endElement
00961     (
00962         const   XMLElementDecl& elemDecl
00963         , const unsigned int    urlId
00964         , const bool            isRoot
00965     );
00966 
00977     virtual void endEntityReference
00978     (
00979         const   XMLEntityDecl&  entDecl
00980     );
00981 
01001     virtual void ignorableWhitespace
01002     (
01003         const   XMLCh* const    chars
01004         , const unsigned int    length
01005         , const bool            cdataSection
01006     );
01007 
01012     virtual void resetDocument();
01013 
01024     virtual void startDocument();
01025 
01052     virtual void startElement
01053     (
01054         const   XMLElementDecl&         elemDecl
01055         , const unsigned int            urlId
01056         , const XMLCh* const            elemPrefix
01057         , const RefVectorOf<XMLAttr>&   attrList
01058         , const unsigned int            attrCount
01059         , const bool                    isEmpty
01060         , const bool                    isRoot
01061     );
01062 
01072     virtual void startEntityReference
01073     (
01074         const   XMLEntityDecl&  entDecl
01075     );
01076 
01094     virtual void XMLDecl
01095     (
01096         const   XMLCh* const    versionStr
01097         , const XMLCh* const    encodingStr
01098         , const XMLCh* const    standaloneStr
01099         , const XMLCh* const    actualEncodingStr
01100     );
01101     //@}
01102 
01103 
01104     // -----------------------------------------------------------------------
01105     //  Implementation of the XMLErrorReporter interface
01106     // -----------------------------------------------------------------------
01107 
01109     //@{
01133     virtual void error
01134     (
01135         const   unsigned int                errCode
01136         , const XMLCh* const                msgDomain
01137         , const XMLErrorReporter::ErrTypes  errType
01138         , const XMLCh* const                errorText
01139         , const XMLCh* const                systemId
01140         , const XMLCh* const                publicId
01141         , const unsigned int                lineNum
01142         , const unsigned int                colNum
01143     );
01144 
01153     virtual void resetErrors();
01154     //@}
01155 
01156 
01157     // -----------------------------------------------------------------------
01158     //  Implementation of the XMLEntityHandler interface
01159     // -----------------------------------------------------------------------
01160 
01162     //@{
01174     virtual void endInputSource(const InputSource& inputSource);
01175 
01190     virtual bool expandSystemId
01191     (
01192         const   XMLCh* const    systemId
01193         ,       XMLBuffer&      toFill
01194     );
01195 
01203     virtual void resetEntities();
01204 
01219     virtual InputSource* resolveEntity
01220     (
01221         const   XMLCh* const    publicId
01222         , const XMLCh* const    systemId
01223     );
01224 
01236     virtual void startInputSource(const InputSource& inputSource);
01237     //@}
01238 
01239 
01241     //@{
01252     bool getDoValidation() const;
01253 
01267     void setDoValidation(const bool newState);
01268     //@}
01269 
01270 
01271 private :
01272     // -----------------------------------------------------------------------
01273     //  Unimplemented constructors and operators
01274     // -----------------------------------------------------------------------
01275     SAXParser(const SAXParser&);
01276     void operator=(const SAXParser&);
01277 
01278 
01279     // -----------------------------------------------------------------------
01280     //  Private data members
01281     //
01282     //  fAttrList
01283     //      A temporary implementation of the basic SAX attribute list
01284     //      interface. We use this one over and over on each startElement
01285     //      event to allow SAX-like access to the element attributes.
01286     //
01287     //  fDocHandler
01288     //      The installed SAX doc handler, if any. Null if none.
01289     //
01290     //  fDTDHandler
01291     //      The installed SAX DTD handler, if any. Null if none.
01292     //
01293     //  fElemDepth
01294     //      This is used to track the element nesting depth, so that we can
01295     //      know when we are inside content. This is so we can ignore char
01296     //      data outside of content.
01297     //
01298     //  fEntityResolver
01299     //      The installed SAX entity handler, if any. Null if none.
01300     //
01301     //  fErrorHandler
01302     //      The installed SAX error handler, if any. Null if none.
01303     //
01304     //  fAdvDHCount
01305     //  fAdvDHList
01306     //  fAdvDHListSize
01307     //      This is an array of pointers to XMLDocumentHandlers, which is
01308     //      how we see installed advanced document handlers. There will
01309     //      usually not be very many at all, so a simple array is used
01310     //      instead of a collection, for performance. It will grow if needed,
01311     //      but that is unlikely.
01312     //
01313     //      The count is how many handlers are currently installed. The size
01314     //      is how big the array itself is (for expansion purposes.) When
01315     //      count == size, is time to expand.
01316     //
01317     //  fParseInProgress
01318     //      This flag is set once a parse starts. It is used to prevent
01319     //      multiple entrance or reentrance of the parser.
01320     //
01321     //  fScanner
01322     //      The scanner being used by this parser. It is created internally
01323     //      during construction.
01324     //
01325     //  fValidator
01326     //      The validator that is installed. If none is provided, we will
01327     //      create and install a DTD validator. We install this on the
01328     //      scanner we create, which it will use to do validation. We set
01329     //      ourself on it as the error reporter for validity errors.
01330     // -----------------------------------------------------------------------
01331     VecAttrListImpl         fAttrList;
01332     DocumentHandler*        fDocHandler;
01333     DTDHandler*             fDTDHandler;
01334     unsigned int            fElemDepth;
01335     EntityResolver*         fEntityResolver;
01336     ErrorHandler*           fErrorHandler;
01337     unsigned int            fAdvDHCount;
01338     XMLDocumentHandler**    fAdvDHList;
01339     unsigned int            fAdvDHListSize;
01340     bool                    fParseInProgress;
01341     XMLScanner*             fScanner;
01342     XMLValidator*           fValidator;
01343 };
01344 
01345 
01346 // ---------------------------------------------------------------------------
01347 //  SAXParser: Getter methods
01348 // ---------------------------------------------------------------------------
01349 inline DocumentHandler* SAXParser::getDocumentHandler()
01350 {
01351     return fDocHandler;
01352 }
01353 
01354 inline const DocumentHandler* SAXParser::getDocumentHandler() const
01355 {
01356     return fDocHandler;
01357 }
01358 
01359 inline EntityResolver* SAXParser::getEntityResolver()
01360 {
01361     return fEntityResolver;
01362 }
01363 
01364 inline const EntityResolver* SAXParser::getEntityResolver() const
01365 {
01366     return fEntityResolver;
01367 }
01368 
01369 inline ErrorHandler* SAXParser::getErrorHandler()
01370 {
01371     return fErrorHandler;
01372 }
01373 
01374 inline const ErrorHandler* SAXParser::getErrorHandler() const
01375 {
01376     return fErrorHandler;
01377 }
01378 
01379 inline const XMLScanner& SAXParser::getScanner() const
01380 {
01381     return *fScanner;
01382 }
01383 
01384 #endif


Copyright © 2000 The Apache Software Foundation. All Rights Reserved.