| """ |
| This module contains the core classes of version 2.0 of SAX for Python. |
| This file provides only default classes with absolutely minimum |
| functionality, from which drivers and applications can be subclassed. |
| |
| Many of these classes are empty and are included only as documentation |
| of the interfaces. |
| |
| $Id: saxlib.py,v 1.12 2002/05/10 14:49:21 akuchling Exp $ |
| """ |
| |
| version = '2.0beta' |
| |
| # A number of interfaces used to live in saxlib, but are now in |
| # various other modules for Python 2 compatibility. If nobody uses |
| # them here any longer, the references can be removed |
| |
| from handler import ErrorHandler, ContentHandler, DTDHandler, EntityResolver |
| from xmlreader import XMLReader, InputSource, Locator, IncrementalParser |
| from _exceptions import * |
| |
| from handler import \ |
| feature_namespaces,\ |
| feature_namespace_prefixes,\ |
| feature_string_interning,\ |
| feature_validation,\ |
| feature_external_ges,\ |
| feature_external_pes,\ |
| all_features,\ |
| property_lexical_handler,\ |
| property_declaration_handler,\ |
| property_dom_node,\ |
| property_xml_string,\ |
| all_properties |
| |
| #============================================================================ |
| # |
| # MAIN INTERFACES |
| # |
| #============================================================================ |
| |
| # ===== XMLFILTER ===== |
| |
| class XMLFilter(XMLReader): |
| """Interface for a SAX2 parser filter. |
| |
| A parser filter is an XMLReader that gets its events from another |
| XMLReader (which may in turn also be a filter) rather than from a |
| primary source like a document or other non-SAX data source. |
| Filters can modify a stream of events before passing it on to its |
| handlers.""" |
| |
| def __init__(self, parent = None): |
| """Creates a filter instance, allowing applications to set the |
| parent on instantiation.""" |
| XMLReader.__init__(self) |
| self._parent = parent |
| |
| def setParent(self, parent): |
| """Sets the parent XMLReader of this filter. The argument may |
| not be None.""" |
| self._parent = parent |
| |
| def getParent(self): |
| "Returns the parent of this filter." |
| return self._parent |
| |
| # ===== ATTRIBUTES ===== |
| |
| class Attributes: |
| """Interface for a list of XML attributes. |
| |
| Contains a list of XML attributes, accessible by name.""" |
| |
| def getLength(self): |
| "Returns the number of attributes in the list." |
| raise NotImplementedError("This method must be implemented!") |
| |
| def getType(self, name): |
| "Returns the type of the attribute with the given name." |
| raise NotImplementedError("This method must be implemented!") |
| |
| def getValue(self, name): |
| "Returns the value of the attribute with the given name." |
| raise NotImplementedError("This method must be implemented!") |
| |
| def getValueByQName(self, name): |
| """Returns the value of the attribute with the given raw (or |
| qualified) name.""" |
| raise NotImplementedError("This method must be implemented!") |
| |
| def getNameByQName(self, name): |
| """Returns the namespace name of the attribute with the given |
| raw (or qualified) name.""" |
| raise NotImplementedError("This method must be implemented!") |
| |
| def getNames(self): |
| """Returns a list of the names of all attributes |
| in the list.""" |
| raise NotImplementedError("This method must be implemented!") |
| |
| def getQNames(self): |
| """Returns a list of the raw qualified names of all attributes |
| in the list.""" |
| raise NotImplementedError("This method must be implemented!") |
| |
| def __len__(self): |
| "Alias for getLength." |
| raise NotImplementedError("This method must be implemented!") |
| |
| def __getitem__(self, name): |
| "Alias for getValue." |
| raise NotImplementedError("This method must be implemented!") |
| |
| def keys(self): |
| "Returns a list of the attribute names in the list." |
| raise NotImplementedError("This method must be implemented!") |
| |
| def has_key(self, name): |
| "True if the attribute is in the list, false otherwise." |
| raise NotImplementedError("This method must be implemented!") |
| |
| def get(self, name, alternative=None): |
| """Return the value associated with attribute name; if it is not |
| available, then return the alternative.""" |
| raise NotImplementedError("This method must be implemented!") |
| |
| def copy(self): |
| "Return a copy of the Attributes object." |
| raise NotImplementedError("This method must be implemented!") |
| |
| def items(self): |
| "Return a list of (attribute_name, value) pairs." |
| raise NotImplementedError("This method must be implemented!") |
| |
| def values(self): |
| "Return a list of all attribute values." |
| raise NotImplementedError("This method must be implemented!") |
| |
| |
| #============================================================================ |
| # |
| # HANDLER INTERFACES |
| # |
| #============================================================================ |
| |
| |
| # ===== DECLHANDLER ===== |
| |
| class DeclHandler: |
| """Optional SAX2 handler for DTD declaration events. |
| |
| Note that some DTD declarations are already reported through the |
| DTDHandler interface. All events reported to this handler will |
| occur between the startDTD and endDTD events of the |
| LexicalHandler. |
| |
| To set the DeclHandler for an XMLReader, use the setProperty method |
| with the identifier http://xml.org/sax/handlers/DeclHandler.""" |
| |
| def attributeDecl(self, elem_name, attr_name, type, value_def, value): |
| """Report an attribute type declaration. |
| |
| Only the first declaration will be reported. The type will be |
| one of the strings "CDATA", "ID", "IDREF", "IDREFS", |
| "NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", or "NOTATION", or |
| a list of names (in the case of enumerated definitions). |
| |
| elem_name is the element type name, attr_name the attribute |
| type name, type a string representing the attribute type, |
| value_def a string representing the default declaration |
| ('#IMPLIED', '#REQUIRED', '#FIXED' or None). value is a string |
| representing the attribute's default value, or None if there |
| is none.""" |
| |
| def elementDecl(self, elem_name, content_model): |
| """Report an element type declaration. |
| |
| Only the first declaration will be reported. |
| |
| content_model is the string 'EMPTY', the string 'ANY' or the content |
| model structure represented as tuple (separator, tokens, modifier) |
| where separator is the separator in the token list (that is, '|' or |
| ','), tokens is the list of tokens (element type names or tuples |
| representing parentheses) and modifier is the quantity modifier |
| ('*', '?' or '+').""" |
| |
| def internalEntityDecl(self, name, value): |
| """Report an internal entity declaration. |
| |
| Only the first declaration of an entity will be reported. |
| |
| name is the name of the entity. If it is a parameter entity, |
| the name will begin with '%'. value is the replacement text of |
| the entity.""" |
| |
| def externalEntityDecl(self, name, public_id, system_id): |
| """Report a parsed entity declaration. (Unparsed entities are |
| reported to the DTDHandler.) |
| |
| Only the first declaration for each entity will be reported. |
| |
| name is the name of the entity. If it is a parameter entity, |
| the name will begin with '%'. public_id and system_id are the |
| public and system identifiers of the entity. public_id will be |
| None if none were declared.""" |
| |
| |
| |
| # ===== LEXICALHANDLER ===== |
| |
| class LexicalHandler: |
| """Optional SAX2 handler for lexical events. |
| |
| This handler is used to obtain lexical information about an XML |
| document, that is, information about how the document was encoded |
| (as opposed to what it contains, which is reported to the |
| ContentHandler), such as comments and CDATA marked section |
| boundaries. |
| |
| To set the LexicalHandler of an XMLReader, use the setProperty |
| method with the property identifier |
| 'http://xml.org/sax/handlers/LexicalHandler'. There is no |
| guarantee that the XMLReader will support or recognize this |
| property.""" |
| |
| def comment(self, content): |
| """Reports a comment anywhere in the document (including the |
| DTD and outside the document element). |
| |
| content is a string that holds the contents of the comment.""" |
| |
| def startDTD(self, name, public_id, system_id): |
| """Report the start of the DTD declarations, if the document |
| has an associated DTD. |
| |
| A startEntity event will be reported before declaration events |
| from the external DTD subset are reported, and this can be |
| used to infer from which subset DTD declarations derive. |
| |
| name is the name of the document element type, public_id the |
| public identifier of the DTD (or None if none were supplied) |
| and system_id the system identfier of the external subset (or |
| None if none were supplied).""" |
| |
| def endDTD(self): |
| "Signals the end of DTD declarations." |
| |
| def startEntity(self, name): |
| """Report the beginning of an entity. |
| |
| The start and end of the document entity is not reported. The |
| start and end of the external DTD subset is reported with the |
| pseudo-name '[dtd]'. |
| |
| Skipped entities will be reported through the skippedEntity |
| event of the ContentHandler rather than through this event. |
| |
| name is the name of the entity. If it is a parameter entity, |
| the name will begin with '%'.""" |
| |
| def endEntity(self, name): |
| """Reports the end of an entity. name is the name of the |
| entity, and follows the same conventions as for |
| startEntity.""" |
| |
| def startCDATA(self): |
| """Reports the beginning of a CDATA marked section. |
| |
| The contents of the CDATA marked section will be reported |
| through the characters event.""" |
| |
| def endCDATA(self): |
| "Reports the end of a CDATA marked section." |
| |
| |
| #============================================================================ |
| # |
| # SAX 1.0 COMPATIBILITY CLASSES |
| # Note that these are all deprecated. |
| # |
| #============================================================================ |
| |
| # ===== ATTRIBUTELIST ===== |
| |
| class AttributeList: |
| """Interface for an attribute list. This interface provides |
| information about a list of attributes for an element (only |
| specified or defaulted attributes will be reported). Note that the |
| information returned by this object will be valid only during the |
| scope of the DocumentHandler.startElement callback, and the |
| attributes will not necessarily be provided in the order declared |
| or specified.""" |
| |
| def getLength(self): |
| "Return the number of attributes in list." |
| |
| def getName(self, i): |
| "Return the name of an attribute in the list." |
| |
| def getType(self, i): |
| """Return the type of an attribute in the list. (Parameter can be |
| either integer index or attribute name.)""" |
| |
| def getValue(self, i): |
| """Return the value of an attribute in the list. (Parameter can be |
| either integer index or attribute name.)""" |
| |
| def __len__(self): |
| "Alias for getLength." |
| |
| def __getitem__(self, key): |
| "Alias for getName (if key is an integer) and getValue (if string)." |
| |
| def keys(self): |
| "Returns a list of the attribute names." |
| |
| def has_key(self, key): |
| "True if the attribute is in the list, false otherwise." |
| |
| def get(self, key, alternative=None): |
| """Return the value associated with attribute name; if it is not |
| available, then return the alternative.""" |
| |
| def copy(self): |
| "Return a copy of the AttributeList." |
| |
| def items(self): |
| "Return a list of (attribute_name,value) pairs." |
| |
| def values(self): |
| "Return a list of all attribute values." |
| |
| |
| # ===== DOCUMENTHANDLER ===== |
| |
| class DocumentHandler: |
| """Handle general document events. This is the main client |
| interface for SAX: it contains callbacks for the most important |
| document events, such as the start and end of elements. You need |
| to create an object that implements this interface, and then |
| register it with the Parser. If you do not want to implement |
| the entire interface, you can derive a class from HandlerBase, |
| which implements the default functionality. You can find the |
| location of any document event using the Locator interface |
| supplied by setDocumentLocator().""" |
| |
| def characters(self, ch, start, length): |
| "Handle a character data event." |
| |
| def endDocument(self): |
| "Handle an event for the end of a document." |
| |
| def endElement(self, name): |
| "Handle an event for the end of an element." |
| |
| def ignorableWhitespace(self, ch, start, length): |
| "Handle an event for ignorable whitespace in element content." |
| |
| def processingInstruction(self, target, data): |
| "Handle a processing instruction event." |
| |
| def setDocumentLocator(self, locator): |
| "Receive an object for locating the origin of SAX document events." |
| |
| def startDocument(self): |
| "Handle an event for the beginning of a document." |
| |
| def startElement(self, name, atts): |
| "Handle an event for the beginning of an element." |
| |
| |
| # ===== HANDLERBASE ===== |
| |
| class HandlerBase(EntityResolver, DTDHandler, DocumentHandler,\ |
| ErrorHandler): |
| """Default base class for handlers. This class implements the |
| default behaviour for four SAX interfaces: EntityResolver, |
| DTDHandler, DocumentHandler, and ErrorHandler: rather |
| than implementing those full interfaces, you may simply extend |
| this class and override the methods that you need. Note that the |
| use of this class is optional (you are free to implement the |
| interfaces directly if you wish).""" |
| |
| |
| # ===== PARSER ===== |
| |
| class Parser: |
| """Basic interface for SAX (Simple API for XML) parsers. All SAX |
| parsers must implement this basic interface: it allows users to |
| register handlers for different types of events and to initiate a |
| parse from a URI, a character stream, or a byte stream. SAX |
| parsers should also implement a zero-argument constructor.""" |
| |
| def __init__(self): |
| self.doc_handler = DocumentHandler() |
| self.dtd_handler = DTDHandler() |
| self.ent_handler = EntityResolver() |
| self.err_handler = ErrorHandler() |
| |
| def parse(self, systemId): |
| "Parse an XML document from a system identifier." |
| |
| def parseFile(self, fileobj): |
| "Parse an XML document from a file-like object." |
| |
| def setDocumentHandler(self, handler): |
| "Register an object to receive basic document-related events." |
| self.doc_handler=handler |
| |
| def setDTDHandler(self, handler): |
| "Register an object to receive basic DTD-related events." |
| self.dtd_handler=handler |
| |
| def setEntityResolver(self, resolver): |
| "Register an object to resolve external entities." |
| self.ent_handler=resolver |
| |
| def setErrorHandler(self, handler): |
| "Register an object to receive error-message events." |
| self.err_handler=handler |
| |
| def setLocale(self, locale): |
| """Allow an application to set the locale for errors and warnings. |
| |
| SAX parsers are not required to provide localisation for errors |
| and warnings; if they cannot support the requested locale, |
| however, they must throw a SAX exception. Applications may |
| request a locale change in the middle of a parse.""" |
| raise SAXNotSupportedException("Locale support not implemented") |