import re import urllib from lxml import etree from BeautifulSoup import BeautifulSoup tree = etree.parse(urllib.urlopen('http://etbe.coker.com.au/feed/')) for item in tree.xpath('//*[name()="content:encoded"]'): soup = BeautifulSoup(item.text) for link in soup.findAll('a'): for x in link.contents: # Kids and their damn hypertext x.replaceWith(re.sub(r' \[\d+\]$', '', x)) try: soup.findAll('ul')[-1].extract() except IndexError: pass item.text = unicode(soup) print etree.tostring(tree)