[Twisted-Python] Way to fix memory leaks of external c module
MārisR
maris at chown.lv
Sat Nov 28 06:05:10 MST 2009
Hello!
Currently I'm trying to write small xmlrpc server for html data processing. Processing is done by html tidy lib, but the problem is that it has massive memory leak.
As processing is blocking operation I'm running it in thread, but after some time and huge html document processing daemon eats all memory.
I wondering if its possible to load utidylib in thread, do processing and after this kill thread and release memory? Or maybe something like deferToProcess?
Thanks in advance!
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import utidylib
from twisted.internet import epollreactor
epollreactor.install()
from twisted.internet import protocol, defer, threads, reactor
from twisted.web import xmlrpc, server
from twisted.python import log, threadpool
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
log.startLogging(sys.stdout)
import codecs
import gc
gc.enable()
gc.set_debug(gc.DEBUG_LEAK)
gc.set_threshold(1)
class TidyProtocol(xmlrpc.XMLRPC):
def xmlrpc_tidify(self, data):
defered = threads.deferToThread(self.tidyParse, data)
defered.addCallback(self.returnToClient)
return defered
def tidyParse(self, data):
options = {
'drop-proprietary-attributes': '1',
'output-xhtml': '1',
'wrap': '0',
'bare': '0',
'clean': '1',
'doctype': 'omit',
'show-body-only': '1',
'word-2000': '0',
'escape-cdata': '0',
'hide-comments': '1',
'force-output': '1',
'alt-text': '',
'show-errors': '0',
'show-warnings': '0',
'tidy-mark': '0',
'char-encoding': 'utf8',
}
if data['html'] == None:
return None
else:
htmldata = data['html'].encode()
print "Tidy start"
return tidy.parseString(htmldata, **options)
def returnToClient(self, data):
gc.collect()
print "Tidy end, retunring result"
return data
if __name__ == '__main__':
r = TidyProtocol()
reactor.listenTCP(1100, server.Site(r))
reactor.run()
More information about the Twisted-Python
mailing list