blob: 64603799d0ba1ed9f8e195043113dabd6066972b [file] [log] [blame]
"""
This module contains the core classes of version 2.0 of SAX for Python.
This file provides only default classes with absolutely minimum
functionality, from which drivers and applications can be subclassed.
Many of these classes are empty and are included only as documentation
of the interfaces.
$Id: saxlib.py,v 1.12 2002/05/10 14:49:21 akuchling Exp $
"""
version = '2.0beta'
# A number of interfaces used to live in saxlib, but are now in
# various other modules for Python 2 compatibility. If nobody uses
# them here any longer, the references can be removed
from handler import ErrorHandler, ContentHandler, DTDHandler, EntityResolver
from xmlreader import XMLReader, InputSource, Locator, IncrementalParser
from _exceptions import *
from handler import \
feature_namespaces,\
feature_namespace_prefixes,\
feature_string_interning,\
feature_validation,\
feature_external_ges,\
feature_external_pes,\
all_features,\
property_lexical_handler,\
property_declaration_handler,\
property_dom_node,\
property_xml_string,\
all_properties
#============================================================================
#
# MAIN INTERFACES
#
#============================================================================
# ===== XMLFILTER =====
class XMLFilter(XMLReader):
"""Interface for a SAX2 parser filter.
A parser filter is an XMLReader that gets its events from another
XMLReader (which may in turn also be a filter) rather than from a
primary source like a document or other non-SAX data source.
Filters can modify a stream of events before passing it on to its
handlers."""
def __init__(self, parent = None):
"""Creates a filter instance, allowing applications to set the
parent on instantiation."""
XMLReader.__init__(self)
self._parent = parent
def setParent(self, parent):
"""Sets the parent XMLReader of this filter. The argument may
not be None."""
self._parent = parent
def getParent(self):
"Returns the parent of this filter."
return self._parent
# ===== ATTRIBUTES =====
class Attributes:
"""Interface for a list of XML attributes.
Contains a list of XML attributes, accessible by name."""
def getLength(self):
"Returns the number of attributes in the list."
raise NotImplementedError("This method must be implemented!")
def getType(self, name):
"Returns the type of the attribute with the given name."
raise NotImplementedError("This method must be implemented!")
def getValue(self, name):
"Returns the value of the attribute with the given name."
raise NotImplementedError("This method must be implemented!")
def getValueByQName(self, name):
"""Returns the value of the attribute with the given raw (or
qualified) name."""
raise NotImplementedError("This method must be implemented!")
def getNameByQName(self, name):
"""Returns the namespace name of the attribute with the given
raw (or qualified) name."""
raise NotImplementedError("This method must be implemented!")
def getNames(self):
"""Returns a list of the names of all attributes
in the list."""
raise NotImplementedError("This method must be implemented!")
def getQNames(self):
"""Returns a list of the raw qualified names of all attributes
in the list."""
raise NotImplementedError("This method must be implemented!")
def __len__(self):
"Alias for getLength."
raise NotImplementedError("This method must be implemented!")
def __getitem__(self, name):
"Alias for getValue."
raise NotImplementedError("This method must be implemented!")
def keys(self):
"Returns a list of the attribute names in the list."
raise NotImplementedError("This method must be implemented!")
def has_key(self, name):
"True if the attribute is in the list, false otherwise."
raise NotImplementedError("This method must be implemented!")
def get(self, name, alternative=None):
"""Return the value associated with attribute name; if it is not
available, then return the alternative."""
raise NotImplementedError("This method must be implemented!")
def copy(self):
"Return a copy of the Attributes object."
raise NotImplementedError("This method must be implemented!")
def items(self):
"Return a list of (attribute_name, value) pairs."
raise NotImplementedError("This method must be implemented!")
def values(self):
"Return a list of all attribute values."
raise NotImplementedError("This method must be implemented!")
#============================================================================
#
# HANDLER INTERFACES
#
#============================================================================
# ===== DECLHANDLER =====
class DeclHandler:
"""Optional SAX2 handler for DTD declaration events.
Note that some DTD declarations are already reported through the
DTDHandler interface. All events reported to this handler will
occur between the startDTD and endDTD events of the
LexicalHandler.
To set the DeclHandler for an XMLReader, use the setProperty method
with the identifier http://xml.org/sax/handlers/DeclHandler."""
def attributeDecl(self, elem_name, attr_name, type, value_def, value):
"""Report an attribute type declaration.
Only the first declaration will be reported. The type will be
one of the strings "CDATA", "ID", "IDREF", "IDREFS",
"NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", or "NOTATION", or
a list of names (in the case of enumerated definitions).
elem_name is the element type name, attr_name the attribute
type name, type a string representing the attribute type,
value_def a string representing the default declaration
('#IMPLIED', '#REQUIRED', '#FIXED' or None). value is a string
representing the attribute's default value, or None if there
is none."""
def elementDecl(self, elem_name, content_model):
"""Report an element type declaration.
Only the first declaration will be reported.
content_model is the string 'EMPTY', the string 'ANY' or the content
model structure represented as tuple (separator, tokens, modifier)
where separator is the separator in the token list (that is, '|' or
','), tokens is the list of tokens (element type names or tuples
representing parentheses) and modifier is the quantity modifier
('*', '?' or '+')."""
def internalEntityDecl(self, name, value):
"""Report an internal entity declaration.
Only the first declaration of an entity will be reported.
name is the name of the entity. If it is a parameter entity,
the name will begin with '%'. value is the replacement text of
the entity."""
def externalEntityDecl(self, name, public_id, system_id):
"""Report a parsed entity declaration. (Unparsed entities are
reported to the DTDHandler.)
Only the first declaration for each entity will be reported.
name is the name of the entity. If it is a parameter entity,
the name will begin with '%'. public_id and system_id are the
public and system identifiers of the entity. public_id will be
None if none were declared."""
# ===== LEXICALHANDLER =====
class LexicalHandler:
"""Optional SAX2 handler for lexical events.
This handler is used to obtain lexical information about an XML
document, that is, information about how the document was encoded
(as opposed to what it contains, which is reported to the
ContentHandler), such as comments and CDATA marked section
boundaries.
To set the LexicalHandler of an XMLReader, use the setProperty
method with the property identifier
'http://xml.org/sax/handlers/LexicalHandler'. There is no
guarantee that the XMLReader will support or recognize this
property."""
def comment(self, content):
"""Reports a comment anywhere in the document (including the
DTD and outside the document element).
content is a string that holds the contents of the comment."""
def startDTD(self, name, public_id, system_id):
"""Report the start of the DTD declarations, if the document
has an associated DTD.
A startEntity event will be reported before declaration events
from the external DTD subset are reported, and this can be
used to infer from which subset DTD declarations derive.
name is the name of the document element type, public_id the
public identifier of the DTD (or None if none were supplied)
and system_id the system identfier of the external subset (or
None if none were supplied)."""
def endDTD(self):
"Signals the end of DTD declarations."
def startEntity(self, name):
"""Report the beginning of an entity.
The start and end of the document entity is not reported. The
start and end of the external DTD subset is reported with the
pseudo-name '[dtd]'.
Skipped entities will be reported through the skippedEntity
event of the ContentHandler rather than through this event.
name is the name of the entity. If it is a parameter entity,
the name will begin with '%'."""
def endEntity(self, name):
"""Reports the end of an entity. name is the name of the
entity, and follows the same conventions as for
startEntity."""
def startCDATA(self):
"""Reports the beginning of a CDATA marked section.
The contents of the CDATA marked section will be reported
through the characters event."""
def endCDATA(self):
"Reports the end of a CDATA marked section."
#============================================================================
#
# SAX 1.0 COMPATIBILITY CLASSES
# Note that these are all deprecated.
#
#============================================================================
# ===== ATTRIBUTELIST =====
class AttributeList:
"""Interface for an attribute list. This interface provides
information about a list of attributes for an element (only
specified or defaulted attributes will be reported). Note that the
information returned by this object will be valid only during the
scope of the DocumentHandler.startElement callback, and the
attributes will not necessarily be provided in the order declared
or specified."""
def getLength(self):
"Return the number of attributes in list."
def getName(self, i):
"Return the name of an attribute in the list."
def getType(self, i):
"""Return the type of an attribute in the list. (Parameter can be
either integer index or attribute name.)"""
def getValue(self, i):
"""Return the value of an attribute in the list. (Parameter can be
either integer index or attribute name.)"""
def __len__(self):
"Alias for getLength."
def __getitem__(self, key):
"Alias for getName (if key is an integer) and getValue (if string)."
def keys(self):
"Returns a list of the attribute names."
def has_key(self, key):
"True if the attribute is in the list, false otherwise."
def get(self, key, alternative=None):
"""Return the value associated with attribute name; if it is not
available, then return the alternative."""
def copy(self):
"Return a copy of the AttributeList."
def items(self):
"Return a list of (attribute_name,value) pairs."
def values(self):
"Return a list of all attribute values."
# ===== DOCUMENTHANDLER =====
class DocumentHandler:
"""Handle general document events. This is the main client
interface for SAX: it contains callbacks for the most important
document events, such as the start and end of elements. You need
to create an object that implements this interface, and then
register it with the Parser. If you do not want to implement
the entire interface, you can derive a class from HandlerBase,
which implements the default functionality. You can find the
location of any document event using the Locator interface
supplied by setDocumentLocator()."""
def characters(self, ch, start, length):
"Handle a character data event."
def endDocument(self):
"Handle an event for the end of a document."
def endElement(self, name):
"Handle an event for the end of an element."
def ignorableWhitespace(self, ch, start, length):
"Handle an event for ignorable whitespace in element content."
def processingInstruction(self, target, data):
"Handle a processing instruction event."
def setDocumentLocator(self, locator):
"Receive an object for locating the origin of SAX document events."
def startDocument(self):
"Handle an event for the beginning of a document."
def startElement(self, name, atts):
"Handle an event for the beginning of an element."
# ===== HANDLERBASE =====
class HandlerBase(EntityResolver, DTDHandler, DocumentHandler,\
ErrorHandler):
"""Default base class for handlers. This class implements the
default behaviour for four SAX interfaces: EntityResolver,
DTDHandler, DocumentHandler, and ErrorHandler: rather
than implementing those full interfaces, you may simply extend
this class and override the methods that you need. Note that the
use of this class is optional (you are free to implement the
interfaces directly if you wish)."""
# ===== PARSER =====
class Parser:
"""Basic interface for SAX (Simple API for XML) parsers. All SAX
parsers must implement this basic interface: it allows users to
register handlers for different types of events and to initiate a
parse from a URI, a character stream, or a byte stream. SAX
parsers should also implement a zero-argument constructor."""
def __init__(self):
self.doc_handler = DocumentHandler()
self.dtd_handler = DTDHandler()
self.ent_handler = EntityResolver()
self.err_handler = ErrorHandler()
def parse(self, systemId):
"Parse an XML document from a system identifier."
def parseFile(self, fileobj):
"Parse an XML document from a file-like object."
def setDocumentHandler(self, handler):
"Register an object to receive basic document-related events."
self.doc_handler=handler
def setDTDHandler(self, handler):
"Register an object to receive basic DTD-related events."
self.dtd_handler=handler
def setEntityResolver(self, resolver):
"Register an object to resolve external entities."
self.ent_handler=resolver
def setErrorHandler(self, handler):
"Register an object to receive error-message events."
self.err_handler=handler
def setLocale(self, locale):
"""Allow an application to set the locale for errors and warnings.
SAX parsers are not required to provide localisation for errors
and warnings; if they cannot support the requested locale,
however, they must throw a SAX exception. Applications may
request a locale change in the middle of a parse."""
raise SAXNotSupportedException("Locale support not implemented")