http://xml.apache.org/http://www.apache.org/http://www.w3.org/

Home

Readme
Installation
Build

API Docs
Samples
Programming
Migration
FAQs

Releases
Feedback

PDF Document

Download
Dev Snapshots
CVS Repository
Mail Archive

API Docs for SAX and DOM
 

Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

XMLRecognizer.hpp

Go to the documentation of this file.
00001 /*
00002  * The Apache Software License, Version 1.1
00003  * 
00004  * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights
00005  * reserved.
00006  * 
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  * 
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer. 
00013  * 
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  * 
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:  
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  * 
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written 
00029  *    permission, please contact apache\@apache.org.
00030  * 
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  * 
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  * 
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 1999, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  *  $Log: XMLRecognizer.hpp,v $
00059  *  Revision 1.6  2000/02/24 20:00:23  abagchi
00060  *  Swat for removing Log from API docs
00061  *
00062  *  Revision 1.5  2000/02/15 01:21:31  roddey
00063  *  Some initial documentation improvements. More to come...
00064  *
00065  *  Revision 1.4  2000/02/06 07:47:48  rahulj
00066  *  Year 2K copyright swat.
00067  *
00068  *  Revision 1.3  1999/12/18 00:19:03  roddey
00069  *  More changes to support the new, completely orthagonal, support for
00070  *  intrinsic encodings.
00071  *
00072  *  Revision 1.2  1999/11/23 01:49:27  rahulj
00073  *  Cannot use class qualifier in class defn. CC under HPUX is happy.
00074  *
00075  *  Revision 1.1.1.1  1999/11/09 01:08:37  twl
00076  *  Initial checkin
00077  *
00078  *  Revision 1.2  1999/11/08 20:44:40  rahul
00079  *  Swat for adding in Product name and CVS comment log variable.
00080  *
00081  */
00082 
00083 #if !defined(XMLRECOGNIZER_HPP)
00084 #define XMLRECOGNIZER_HPP
00085 
00094 class  XMLRecognizer
00095 {
00096 public :
00097     // -----------------------------------------------------------------------
00098     //  Class types
00099     //
00100     //  This enum represents the various encoding families that we have to
00101     //  deal with individually at the scanner level. This does not indicate
00102     //  the exact encoding, just the rough family that would let us scan
00103     //  the XML/TextDecl to find the encoding string.
00104     //
00105     //  The 'L's and 'B's stand for little or big endian. We conditionally
00106     //  create versions that will automatically map to the local UTF-16 and
00107     //  UCS-4 endian modes.
00108     //
00109     //  OtherEncoding means that its some transcoder based encoding, i.e. not
00110     //  one of the ones that we do internally. Its a special case and should
00111     //  never be used directly outside of the reader.
00112     //
00113     //  NOTE: Keep this in sync with the name map array in the Cpp file!!
00114     // -----------------------------------------------------------------------
00115     enum Encodings
00116     {
00117         EBCDIC          = 0
00118         , UCS_4B        = 1
00119         , UCS_4L        = 2
00120         , US_ASCII      = 3
00121         , UTF_8         = 4
00122         , UTF_16B       = 5
00123         , UTF_16L       = 6
00124 
00125         , Encodings_Count
00126         , Encodings_Min = EBCDIC
00127         , Encodings_Max = UTF_16L
00128 
00129         , OtherEncoding = 999
00130 
00131         #if defined(ENDIANMODE_BIG)
00132         , Def_UTF16     = UTF_16B
00133         , Def_UCS4      = UCS_4B
00134         #else
00135         , Def_UTF16     = UTF_16L
00136         , Def_UCS4      = UCS_4L
00137         #endif
00138     };
00139 
00140 
00141     // -----------------------------------------------------------------------
00142     //  Public, const static data
00143     //
00144     //  These are the byte sequences for each of the encodings that we can
00145     //  auto sense, and their lengths.
00146     // -----------------------------------------------------------------------
00147     static const char           fgASCIIPre[];
00148     static const unsigned int   fgASCIIPreLen;
00149     static const XMLByte        fgEBCDICPre[];
00150     static const unsigned int   fgEBCDICPreLen;
00151     static const XMLByte        fgUTF16BPre[];
00152     static const XMLByte        fgUTF16LPre[];
00153     static const unsigned int   fgUTF16PreLen;
00154     static const XMLByte        fgUCS4BPre[];
00155     static const XMLByte        fgUCS4LPre[];
00156     static const unsigned int   fgUCS4PreLen;
00157 
00158 
00159     // -----------------------------------------------------------------------
00160     //  Encoding recognition methods
00161     // -----------------------------------------------------------------------
00162     static Encodings basicEncodingProbe
00163     (
00164         const   XMLByte* const      rawBuffer
00165         , const unsigned int        rawByteCount
00166     );
00167 
00168     static Encodings encodingForName
00169     (
00170         const   XMLCh* const    theEncName
00171     );
00172 
00173     static const XMLCh* nameForEncoding(const Encodings theEncoding);
00174 
00175 
00176 private :
00177     // -----------------------------------------------------------------------
00178     //  Unimplemented constructors, operators, and destructor
00179     //
00180     //  This class is effectively being used as a namespace for some static
00181     //  methods.
00182     // -----------------------------------------------------------------------
00183     XMLRecognizer();
00184     ~XMLRecognizer();
00185     void operator=(const XMLRecognizer&);
00186 };
00187 
00188 #endif


Copyright © 2000 The Apache Software Foundation. All Rights Reserved.