""" $RCSfile: PyanaProcessor.py,v $ This class encapsulates an XSLT Processor for use by ZopeXMLMethods. This is the Pyana version, including support for XSLT parameters. It does not yet include support for URN resolution. Author: Craeg Strong Release: 1.0 """ __cvstag__ = '$Name: $'[6:-2] __date__ = '$Date: 2003/03/30 03:45:47 $'[6:-2] __version__ = '$Revision: 1.12 $'[10:-2] # Pyana import Pyana # Zope from Acquisition import aq_get # python import os.path, sys, urllib # local peer classes from IXSLTProcessor import IXSLTProcessor ################################################################ # Defaults ################################################################ namespacesPropertyName = 'URNnamespaces' parametersPropertyName = 'XSLparameters' ################################################################ # PyanaProcessor class ################################################################ class PyanaProcessor: """ This class encapsulates an XSLT Processor for use by ZopeXMLMethods. This is the Pyana version, including support for XSLT parameters. It does not yet include support for URN resolution. """ __implements__ = IXSLTProcessor name = 'Pyana 0.6' def __init__(self): "Initialize a new instance of PyanaProcessor" self.debugLevel = 0 # This global variable determines which ProblemListener is used by # Pyana to report errors and messages during XSLT transformation Pyana.defaultProblemListenerFactory = ProblemListener # This global variable determines which ErrorHandler is used by # Pyana to report errors during XML parsing Pyana.defaultErrorHandlerFactory = ErrorHandler ################################################################ # Methods implementing the IXSLTProcessor interface ################################################################ def setDebugLevel(self, level): """ Set debug level from 0 to 3. 0 = silent 3 = extra verbose Debug messages go to Zope server log. """ self.debugLevel = level def transform(self, xmlContents, xmlURL, xsltContents, xsltURL, transformObject = None, REQUEST = None): """ Transforms the passed in XML into the required output (usually HTML) using the passed in XSLT. Both the XML and XSLT strings should be well-formed. Returns the output as a string. transformObject and REQUEST params may be used to acquire Zope content such as XSLT parameters and URN namespaces, if required. Catches any exceptions thrown by transformGuts and sends the error output to stderr, returns empty string to the caller. The idea is that web site users will at worst see an empty page. """ topLevelParams = None if transformObject is not None: topLevelParams = self.getXSLParameters(transformObject) if self.debugLevel > 1: print "params:", topLevelParams if self.debugLevel > 1: print "xsltContents:" print xsltContents print "xmlContents:" print xmlContents # Pyana does not work with unicode; rather it requires an encoding. # Unfortunately, ParsedXML gives us unicode. We coerce it to UTF-8 # The only place we get this today AFAIK is ParsedXML # @@ FIXME we need a better way... CKS 3/2/2003 if type(xmlContents) is type(u''): xmlContents = xmlContents.encode('utf8') if type(xsltContents) is type(u''): xsltContents = xsltContents.encode('utf8') try: result = self.transformGuts(xmlContents, xmlURL, xsltContents, xsltURL, transformObject, topLevelParams, REQUEST) except Exception, e: sys.stderr.write(str(e) + '\n') return "" return result def addParam(self, paramMap, name, value): """ This is a convenience function for adding parameters in the correct format to the parameter map to be used for the 'params' parameter in transformGuts. """ paramMap[ name ] = "'%s'" % (value) return paramMap def transformGuts(self, xmlContents, xmlURL, xsltContents, xsltURL, transformObject = None, params = None, REQUEST = None): """ Actually performs the transformation. Throws an Exception if there are any errors. """ namespaceMap = {} if transformObject is not None: namespaceMap = self.retrieveNamespaces(transformObject) if self.debugLevel > 1: print "namespaces:", namespaceMap if params is None: params = {} t = Pyana.Transformer() t.useValidation = 0 t.setStylesheetParams(params) t.setEntityResolver(URIResolver(namespaceMap, REQUEST)) result = t.transform2String(source = xmlContents, style = xsltContents) return result ################################################################ # Utility methods ################################################################ def retrieveNamespaces(self, transformObject): """ retrieves Namespaces defined for URI Resolution """ NIDs = aq_get(transformObject,namespacesPropertyName,None) result = {} if NIDs is not None: for n in NIDs: value = aq_get(transformObject,n,None) # I use callable() to determine if it is not a scalar. # If not, it must be a Zope object (I think) - WGM if callable(value): result[n] = value else: result[n] = str(value) return result def getXSLParameters(self, transformObject): """ Return XSL Transformer parameters as a dictionary of strings in the form 'name:value' as would be passed to an XSLT engine like Saxon, 4suite, etc. The values are obtained by looking for a property in the current context called 'XSLparameters', which should be a list of strings. Each name on the list is looked up in the current context. If its value is a scalar, then the pair 'name:value' is returned. If the value is an object, then the pair 'name:url' is returned where url is the absolute URL of the object. The key (name) is actually a tuple of two strings, the first of which is an optional namespace (we don't use this today). """ parms = aq_get(transformObject,parametersPropertyName,None) result = {} if parms is not None: for p in parms: value = aq_get(transformObject,p,None) # I use callable() to determine if it is not a scalar. # If not, it must be a Zope object (I think) - WGM if callable(value): self.addParam(result, p, value.absolute_url()) else: self.addParam(result, p, str(value)) return result ################################################################ # Pyana API Hooks ################################################################ class ProblemListener: """ This class replaces the default class used by Pyana to report errors and XSLT messages that occur during the XSLT transformation process. """ def problem( self, where, classification, sourceNode, styleNode, msg, uri, line, offset): """ The default error handler for Pyana prints out messages to stderr. Throw an exception instead. """ import sys message = '' typeMap = { Pyana.eERROR: 'Error: ', Pyana.eWARNING: 'Warning: ', Pyana.eMESSAGE: 'Message: ' } message = typeMap[classification] + msg if uri or line != -1 or offset != -1: if line != -1 or offset != -1: message = message + \ ' [systemID: "%s" (line: %s, column: %s)]' % \ (uri, line, offset) else: message = message + ' [systemID: "%s"]'% (uri,) raise Exception(message) class ErrorHandler: """ This class replaces the default class used by Pyana to report errors that occur during the parsing of XML files. """ def common(self, name, e): message = '%s: %s' % (name, e.message) if e.systemID or e.lineNumber or e.columnNumber: message = message + \ '[systemID: "%s" (line: %s, column: %s)]' % \ (e.systemID, e.lineNumber, e.columnNumber) return message def warning(self, e): # From Pyana-0.6/Test/TestEntityResolver.py line 85... # # A request is made to open the NULL string # because Xerces is attempting to load the # XML file's schema. Whether that is a bug # or not is still up in the air. testXSLT # has the same problem. # # When this happens, a warning is generated. It looks like this: # # Warning: An exception occurred! Type:RuntimeException, Message:Warning: The primary document entity could not be opened. Id= # print self.common('Warning', e) pass def error(self, e): print self.common('Error', e) def fatalError(self, e): raise Exception(self.common('FatalError', e)) def resetErrors(self): pass class URIResolver: def __init__(self, namespaceMap, REQUEST): """ Remember the URN namespaces corresponding to Zope folders and the REQUEST context with which we want to load the resources """ self.namespaceMap = namespaceMap self.req = REQUEST def acquireObjectContents(self, base, contextURL, REQUEST): """ Obtain the contents of the Zope object indicated by the passed in context, starting from the passed in base object. """ #print "acquire contents for:",contextURL zObject = base #print "base", zObject.getId() # # why doesn't the below work? Is this a bug? # (see com/arielpartners/website/scripts/resolver.dtml) # zObject = base.restrictedTraverse(contextURL) # sigh. Do it the hard way. contextList = contextURL.split('/') for context in contextList: zObject = aq_get(zObject,context,None) if zObject is None: return None contents = zObject(zObject, REQUEST) return contents def resolveEntity(self, public, sysid): "Resolve the URN to a Zope object, or pass on the request" if self.isRecognizedURN(sysid): #print "resolveEntity, publicID '%s', sysid '%s'" % (public, sysid) uri = sysid[sysid.find('urn:'):] uriParts = uri.split(':') nid = uriParts[1] # namespace ID nss = uriParts[2] # namespace specific string base = self.namespaceMap.get(nid, None) if base is None: # revert to normal behavior return None elif type(base) == type(''): # We are mapping one URL to another a la XMLCatalog RewriteURI # # could use urllib join, but it replaces the last component if no trailing slash. e.g. # # urllib.join ("http://www.foo.com/bar", "mumble.xml") ==> http://www.foo.com/mumble.xml # os.path.join ("http://www.foo.com/bar", "mumble.xml") ==> http://www.foo.com/bar/mumble.xml resolvedURL = os.path.join(base, nss) return urllib.urlopen(resolvedURL).read() else: # its a Zope object, we must retrieve its contents st = self.acquireObjectContents(base, nss, self.req) if st is None: # failure, cannot grab object, revert to normal behavior return None else: # load the resource from the Zope object return StringInputStream(st) else: # revert to normal behavior return None def isRecognizedURN(self, uri): "Return true if this uri is of a format we recognize" if uri == '': return 0 # false if uri.find('urn:') == -1: return 0 # false uri = uri[uri.find('urn:'):] uriParts = uri.split(':') return uriParts[0] == 'urn' and len(uriParts) == 3 class StringInputStream: def __init__(self, string): self.string = string.encode('utf-8') def makeStream(self): # The returned stream can be any object with # a read(size) method. So open(...), urllib.open(...), # etc. will all generate valid streams. In this case, # we will use StringIO to present a string as a # stream. We can't use cStringIO, b/c it might be # UNICODE. from StringIO import StringIO return StringIO(self.string)