geRSSicht
changeset 83:8ee7f00c0819 heise-feed tip
Fixed a stupid ATOM-feed issue that Telepolis introduced
Now one can parse the Telepolis feed again
| author | Tobias Mueller (meatbox) <muelli@cryptobitch.de> |
|---|---|
| date | Fri, 15 Apr 2011 15:54:36 +0200 |
| parents | 824525444374 |
| children | |
| files | src/heisefeed.py src/telepolisfeed.py |
| diffstat | 2 files changed, 8 insertions(+), 1 deletions(-) [+] |
line diff
1.1 --- a/src/heisefeed.py Sat Jan 02 16:44:02 2010 +0100 1.2 +++ b/src/heisefeed.py Fri Apr 15 15:54:36 2011 +0200 1.3 @@ -237,6 +237,7 @@ 1.4 def fetch(self, url="http://heise-online.mobi/?seite=%d", index=0): 1.5 url = url % index 1.6 buf = urllib2.urlopen(url).read().decode('utf-8') 1.7 + self.log.debug('Fetched %s', buf) 1.8 return buf 1.9 1.10 def fetch_and_parse(self):
2.1 --- a/src/telepolisfeed.py Sat Jan 02 16:44:02 2010 +0100 2.2 +++ b/src/telepolisfeed.py Fri Apr 15 15:54:36 2011 +0200 2.3 @@ -94,7 +94,12 @@ 2.4 return text 2.5 2.6 def feed(self, xml): 2.7 - xmlo = xmlobject.XMLFile( raw = xml.encode('utf-8')) 2.8 + token = '</xml>\n' 2.9 + if xml.endswith(token): # Hotfix for a stupid XML issue caused by a wrongly formatted ATOM feed 2.10 + xml = xml[:-len(token)] 2.11 + raw = xml.encode('utf-8') 2.12 + self.log.debug('Trying to feed %s', xml) 2.13 + xmlo = xmlobject.XMLFile( raw = raw) 2.14 2.15 SUFFIX = "/1.html" 2.16 LENGTH = 5 2.17 @@ -128,6 +133,7 @@ 2.18 2.19 def to_atom(self): 2.20 html = self.fetch() 2.21 + self.log.debug('fetched html: %s', html) 2.22 p = TelepolisParser() 2.23 atom = p.feed(html) 2.24 return atom
