[JDEV] jabber.py problems

Jacek Konieczny jajcus at bnet.pl
Wed Feb 6 13:35:05 CST 2002


On Wed, Feb 06, 2002 at 06:14:17PM +0100, Igor Stroh wrote:
> > > it doesn't work this way, don't ask my why :) to switch to utf-8, edit
> > > your site.py and change the line that says "encoding = 'ascii'" to
> > > "encoding = 'UTF-8'"
> > It is not a good thing (one Python hacker told me this, with some
> > arguments, that convinced me).
> > jabber.py should be fixed, so it uses proper encoding. 
> 
> there's no way to do it other than to follow the instructions at
> http://www.python.org/cgi-bin/faqw.py?req=show&file=faq04.102.htp
> 
...
> 
> again, this is a known issue, if you think there's a better way to handle
> this problem, please send a patch 
Here is patch attached. It is not very good or pretty, but I wrote it
just to show you how I think it should look like.

IMHO jabber.py module should work on Unicode and it should not depend in
any way on system or locale encoding. Applications are responsible for
encoding conversion and if they don't do it well it is OK, that they
crash. Sometimes it is the only way to convinve ascii-speaking developer
to fix this :-)

The problem is, that the expat python module doesn't support Unicode
very well :-( Thats why the patch is so ugly (but I am sure there are
better ways to do this anyway).

This patch makes the sample jabber client work for me, with
international characters.

It could happen, that conversion error ("ordinal not in range") may
occur. If it is raised in jabber.py, it means something more has to be
fixed in the module. When in the application --- this means application
is broken. Making the module silently convert international characters
to "?" is bad. I left this behaviour for log and debug messages --- this
are the only places where it seems OK for me.

> or a solution proposal to jabber.py
> mailing list 
> or just post in here, i'll forward the message to the list...
Could you do this, please?

Greets,
       Jacek

The ugly patch follow...

diff -durN jabber.py-0.3-1.orig/examples/test_client.py jabber.py-0.3-1/examples/test_client.py
--- jabber.py-0.3-1.orig/examples/test_client.py	Thu Jan 17 13:05:40 2002
+++ jabber.py-0.3-1/examples/test_client.py	Wed Feb  6 20:13:48 2002
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/python
 
 # $Id: test_client.py,v 1.9 2002/01/17 12:05:40 mallum Exp $
 
@@ -9,6 +9,7 @@
 from select import select
 from string import split,strip,join
 import sys,os
+import locale
 
 sys.path.insert(1, os.path.join(sys.path[0], '..'))
 
@@ -24,6 +25,12 @@
 MyStatus = ''
 MyShow   = ''
 
+loc = locale.getdefaultlocale()
+if loc[1]:
+	LocalEncoding=loc[1]
+else:
+	LocalEncoding=getdefaultencoding()
+
 def usage():
     print "%s: a simple python jabber client " % sys.argv[0]
     print "usage:"
@@ -107,7 +114,7 @@
         if Who != '':
             msg = jabber.Message(Who, strip(txt))
             msg.setType('chat')
-            print "<%s> %s" % (JID, msg.getBody())
+            print "<%s> %s" % (JID.encode(LocalEncoding,"replace"), msg.getBody().encode(LocalEncoding,"replace"))
             con.send(msg)
         else:
             print colorize('Nobody selected','red')
@@ -117,8 +124,8 @@
     """Called when a message is recieved"""
     if msg.getBody(): ## Dont show blank messages ##
         print colorize(
-            '<' + str(msg.getFrom()) + '>', 'green'
-            ) + ' ' + msg.getBody()
+            '<' + str(msg.getFrom()).encode(LocalEncoding,"replace") + '>', 'green'
+            ) + ' ' + msg.getBody().encode(LocalEncoding,"replace") 
 
 def presenceCB(con, prs):
     """Called when a presence is recieved"""
@@ -149,11 +156,23 @@
         print colorize("we are now unsubscribed to %s"  % (who), 'blue')
 
     elif type == 'available':
+        sh=prs.getShow()
+	if sh:
+		sh=sh.encode(LocalEncoding,"replace")
+        st=prs.getStatus()
+	if st:
+		st=st.encode(LocalEncoding,"replace")
         print colorize("%s is available (%s / %s)" % \
-                       (who, prs.getShow(), prs.getStatus()),'blue')
+                       (who, sh, st),'blue')
     elif type == 'unavailable':
+	sh=prs.getShow()
+	if sh:
+		sh=sh.encode(LocalEncoding,"replace")
+	st=prs.getStatus()
+	if st:
+		st=st.encode(LocalEncoding,"replace")
         print colorize("%s is unavailable (%s / %s)" % \
-                       (who, prs.getShow(), prs.getStatus()),'blue')
+                       (who, sh, st),'blue')
 
 
 def iqCB(con,iq):
@@ -243,7 +262,7 @@
     inputs, outputs, errors = select([sys.stdin], [], [],1)
 
     if sys.stdin in inputs:
-        doCmd(con,sys.stdin.readline())
+        doCmd(con,unicode(sys.stdin.readline(),LocalEncoding))
     else:
         con.process(1)
     
diff -durN jabber.py-0.3-1.orig/jabber.py jabber.py-0.3-1/jabber.py
--- jabber.py-0.3-1.orig/jabber.py	Thu Jan 17 13:05:40 2002
+++ jabber.py-0.3-1/jabber.py	Wed Feb  6 20:18:05 2002
@@ -155,7 +155,7 @@
 
     def send(self, what):
         """Sends a jabber protocol element (Node) to the server"""
-        xmlstream.Client.write(self,str(what))
+        xmlstream.Client.write(self,what)
 
     def dispatch(self, root_node ):
         """Called internally when a 'protocol element' is recieved.
@@ -364,7 +364,7 @@
 
     def send(self, what):
         """Sends a jabber protocol element (Node) to the server"""
-        xmlstream.Client.write(self,str(what))
+        xmlstream.Client.write(self,what.unicode())
 
     def sendInitPresence(self):
         """Sends an empty presence protocol element to the
@@ -603,6 +603,9 @@
         """returns an xmlstreamnode representation of the protocol element"""
         return self._node
     
+    def unicode(self):
+        return self._node.unicode()
+
     def __str__(self):
         return self._node.__str__()
 
diff -durN jabber.py-0.3-1.orig/xmlstream.py jabber.py-0.3-1/xmlstream.py
--- jabber.py-0.3-1.orig/xmlstream.py	Thu Jan 17 13:05:40 2002
+++ jabber.py-0.3-1/xmlstream.py	Wed Feb  6 20:22:18 2002
@@ -44,11 +44,6 @@
 STDIO   = 0
 TCP_SSL = 2
 
-ENCODING = site.encoding  ## fallback encoding to avoid random
-                          ## random UnicodeError: ASCII decoding error:
-                          ##                      ordinal not in range(128)
-                          ## type errors - being looked into. 
-
 BLOCK_SIZE  = 1024     ## Number of bytes to get at at time via socket
                        ## transactions
 
@@ -159,7 +154,28 @@
         return newnode
 
     def __str__(self):
-        return self._xmlnode2str()
+        return self.unicode()
+
+    def unicode(self, parent=None):
+        """Returns an xml ( Unicode ) representation of the node
+         and it children"""
+        s = u"<" + self.name  
+        if self.namespace:
+            if parent and parent.namespace != self.namespace:
+                s = s + u" xmlns = '%s' " % self.namespace
+        for key in self.attrs.keys():
+            val = str(self.attrs[key])
+            s = s + u" %s='%s'" % ( key, XMLescape(val) )
+        s = s + u">"
+        cnt = 0 
+        if self.kids != None:
+            for a in self.kids:
+                if (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt])
+                s = s + a._xmlnode2str(parent=self)
+                cnt=cnt+1
+        if (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt])
+        s = s + u"</" + self.name + u">"
+        return s
 
     def _xmlnode2str(self, parent=None):
         """Returns an xml ( string ) representation of the node
@@ -208,6 +224,7 @@
        method of Node"""
     def __init__(self,data):
         self._parser = xml.parsers.expat.ParserCreate(namespace_separator=' ')
+        self._parser.returns_unicode = 1
         self._parser.StartElementHandler  = self.unknown_starttag
         self._parser.EndElementHandler    = self.unknown_endtag
         self._parser.CharacterDataHandler = self.handle_data
@@ -298,8 +315,10 @@
             self._logFH = None
         
     def DEBUG(self,txt):
+        if type(txt) is type(u""):
+	    txt=txt.encode(sys.getdefaultencoding(),"replace")
         if self._debug:
-            sys.stderr.write("DEBUG: %s\n" % txt)
+            sys.stderr.write("DEBUG: %s\n" % txt )
 
     def getSocket(self):
         return self._sock
@@ -368,45 +387,42 @@
         data_in = u''
         if self._connection == TCP:
             data_in = data_in + \
-              unicode(self._sock.recv(BLOCK_SIZE),'utf-8').encode(ENCODING,
-                                                            'replace')
+              unicode(self._sock.recv(BLOCK_SIZE),'utf-8')
             while data_in:
                 data = data + data_in
                 if len(data_in) != BLOCK_SIZE:
                     break
-                data_in = unicode(self._sock.recv(BLOCK_SIZE),'utf-8').encode(
-                    ENCODING, 'replace')
-
+                data_in = unicode(self._sock.recv(BLOCK_SIZE),'utf-8')
         if self._connection == TCP_SSL:
             data_in = data_in + \
-              unicode(self._sslObj.recv(BLOCK_SIZE),'utf-8').encode(ENCODING,'replace')
+              unicode(self._sslObj.recv(BLOCK_SIZE),'utf-8')
             while data_in:
                 data = data + data_in
                 if len(data_in) != BLOCK_SIZE:
                     break
-                data_in = unicode(self._sslObj.recv(BLOCK_SIZE),'utf-8').encode(ENCODING, 'replace')
+                data_in = unicode(self._sslObj.recv(BLOCK_SIZE),'utf-8')
 
         elif self._connection == STDIO:
             ## Hope this dont buffer !
-            data_in = data_in + unicode(sys.stdin.read(1024),'utf-8').encode(
-                    ENCODING, 'replace')
-            while data_in:
+            data_in = data_in + unicode(sys.stdin.read(1024),'utf-8')
+	    while data_in:
                 data = data + data_in
                 if len(data_in) != 1024:
                     break
-                data_in = unicode(sys.stdin.read(1024),'utf-8').encode(
-                    ENCODING, 'replace')
+                data_in = unicode(sys.stdin.read(1024),'utf-8')
         else:
             pass # should never get here
             
         self.DEBUG("got data %s" % data )
         self.log(data, 'RECV:')
-        self._parser.Parse(data)
+        self._parser.Parse(data.encode("utf-8"))
         return data
     
     def write(self,data_out=u''):
         """Writes raw outgoing data. blocks"""
         try:
+            if type(data_out) is type(u''):
+                data_out=data_out.encode("utf-8")
             if self._connection == TCP:
                 self._sock.send (data_out)
             elif self._connection == TCP_SSL:
@@ -418,6 +434,7 @@
             self.log(data_out, 'SENT:')
             self.DEBUG("sent %s" % data_out)
         except:
+            raise
             self.DEBUG("xmlstream write threw error")
             self.disconnected()
             
@@ -461,9 +478,13 @@
     def log(self, data, inout=''):
         """Logs data to the specified filehandle. Data is time stamped
         and prefixed with inout"""
+	if type(data) is type(u""):
+		data=data.encode(sys.getdefaultencoding(),"replace")
+	if type(inout) is type(u""):
+		inout=data.encode(sys.getdefaultencoding(),"replace")
         if self._logFH is not None:
             self._logFH.write("%s - %s - %s\n" %           
-            (time.asctime(time.localtime(time.time())), inout, data ) )
+            (time.asctime(time.localtime(time.time())), inout, data)) 
         
     def getIncomingID(self):
         """Returns the streams ID"""



More information about the JDev mailing list