pyjama_xml.py

00001 #!/usr/bin/env python
00002 # -*- coding: utf-8 -*-
00003 
00004 # ----------------------------------------------------------------------------
00005 # pyjama_xml - xml parser for the new jamendo database dumps
00006 # Copyright (c) 2008 Daniel Nögel
00007 #
00008 # This program is free software: you can redistribute it and/or modify
00009 # it under the terms of the GNU General Public License as published by
00010 # the Free Software Foundation, either version 3 of the License, or
00011 # (at your option) any later version.
00012 #
00013 # This program is distributed in the hope that it will be useful,
00014 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 # GNU General Public License for more details.
00017 # You should have received a copy of the GNU General Public License
00018 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
00019 # ----------------------------------------------------------------------------
00020 
00021 ########################################################
00022 #                                                      #
00023 # this script is based on Dale Jefferson's 'pyjamendo' #
00024 # thanks for that, it really helped me with my jamendo #
00025 # player                                               #
00026 #                                                      #
00027 ########################################################
00028 
00029 
00030 from xml.sax import make_parser
00031 from xml.sax.handler import ContentHandler
00032 
00033 
00034 
00035 def parse_xml(xml, parent):
00036         parser = make_parser()
00037         Handler = Magnatune_handler(parent)
00038         parser.setContentHandler(Handler)
00039         parser.parse(open(xml))
00040         parent.insert_artists(Handler.artists)
00041         parent.insert_albums(Handler.albums)
00042         parent.insert_tracks(Handler.tracks)
00043         parent.insert_tags(Handler.tags)
00044         print "%i Artists, %i Albums and %i Tracks and %i Tags computed." % (Handler.total_artists, Handler.total_albums, Handler.total_tracks, Handler.total_tags)
00045         return #artists, albums, tracks, tags
00046 
00047 
00048 class Magnatune_handler(ContentHandler):
00049     # initing vars
00050     def __init__(self, parent):
00051         self.parent = parent
00052         self.artists = []
00053         self.albums = []
00054         self.tracks = []
00055         self.tags = []
00056 
00057 
00058         self.in_artist = False
00059         self.in_artist_id = False
00060         self.in_artist_name = False
00061         self.in_artist_url = False
00062         self.in_artist_image = False
00063         self.in_artist_mbgid = False
00064         self.in_artist_country = False
00065         self.in_artist_state = False
00066         self.in_artist_city = False
00067         self.in_artist_latitude = False
00068         self.in_artist_longitude = False
00069 
00070         self.in_album = False
00071         self.in_album_id = False
00072         self.in_album_name = False
00073         self.in_album_url = False
00074         self.in_album_releasedate = False
00075         self.in_album_filename = False
00076         self.in_album_id3genre = False
00077         self.in_album_mbgid = False
00078         self.in_album_license_artwork = False
00079 
00080         self.in_track = False
00081         self.in_track_id = False
00082         self.in_track_name = False
00083         self.in_track_duration = False
00084         self.in_track_numalbum = False
00085         self.in_track_filename = False
00086         self.in_track_mbgid = False
00087         self.in_track_id3genre = False
00088         self.in_track_license = False
00089 
00090         self.in_track_tag = False
00091         self.in_track_tag_idstr = False
00092         self.in_track_tag_weight = False
00093 
00094         self.total_artists = 0
00095         self.last_artists1 = 0
00096         self.last_artists2 = 0
00097         self.total_albums = 0
00098         self.total_tracks = 0
00099         self.total_tags = 0
00100     
00101         self.trackcount = 0
00102         self.albumcount = 0
00103         self.lastid = 0
00104 
00105         self.set_artist_vars()
00106         self.set_album_vars()
00107         self.set_track_vars()
00108 
00109         self.track_tags = None
00110     
00111     ###################################################################
00112     #
00113     # sets_*_vars ints all vars belonging to albums / tracks / artists and sets them to ''
00114     # RETURNS: n/A
00115     #
00116     def set_artist_vars(self):
00117         self.artist_id, self.artist_name, self.artist_url, self.artist_image, self.artist_mbgid, self.artist_country, self.artist_state, self.artist_city, self.artist_latitude, self.artist_longitude = '','','','','','','','','', ''
00118     def set_album_vars(self):
00119         self.album_id, self.album_name, self.album_url, self.album_releasedate, self.album_filename, self.album_id3genre, self.album_mbgid, self.album_license_artwork = '','','','','','','',''
00120     def set_track_vars(self):
00121         self.track_id, self.track_name, self.track_duration, self.track_numalbum, self.track_filename, self.track_mbgid, self.track_id3genre, self.track_license, self.track_tag_weight, self.track_tag_idstr = '','','','','','','','', '', ''
00122 
00123     ###################################################################
00124     #
00125     # called when a new element is opened
00126     # RETURNS: n/A
00127     # 
00128     def startElement(self, name, attrs):
00129         # ARIST
00130         if not self.in_album and not self.in_track:
00131             if name == "artist":
00132                 self.in_artist = True
00133             elif name == "id" and self.in_artist:
00134                 self.in_artist_id = True
00135             elif name == "name" and self.in_artist:
00136                 self.in_artist_name = True
00137             elif name == "url" and self.in_artist:
00138                 self.in_artist_url = True
00139             elif name == "image" and self.in_artist:
00140                 self.in_artist_image = True
00141             elif name == "mbgid" and self.in_artist:
00142                 self.in_artist_mbgid = True
00143             elif name == "country" and self.in_artist:
00144                 self.in_artist_country = True
00145             elif name == "state" and self.in_artist:
00146                 self.in_artist_state = True
00147             elif name == "city" and self.in_artist:
00148                 self.in_artist_city = True
00149             elif name == "latitude" and self.in_artist:
00150                 self.in_artist_latitude = True
00151             elif name == "longitude" and self.in_artist:
00152                 self.in_artist_longitude = True
00153             elif name == "album":
00154                 self.in_album = True
00155                 self.albumcount += 1
00156 
00157         # ALBUM
00158         if not self.in_track:
00159             if name == "id" and self.in_album:
00160                 self.in_album_id = True
00161             elif name == "name" and self.in_album:
00162                 self.in_album_name = True
00163             elif name == "url" and self.in_album:
00164                 self.in_album_url = True
00165             elif name == "releasedate" and self.in_album:
00166                 self.in_album_releasedate = True
00167             elif name == "filename" and self.in_album:
00168                 self.in_album_filename = True
00169             elif name == "id3genre" and self.in_album:
00170                 self.in_album_id3genre = True
00171             elif name == "mbgid" and self.in_album:
00172                 self.in_album_mbgid = True
00173             elif name == "license_artwork" and self.in_album:
00174                 self.in_album_license_artwork = True
00175 
00176         # TRACK
00177 
00178         if name == "track":
00179             self.in_track = True
00180             self.trackcount += 1
00181         elif name == "id" and self.in_track:
00182             self.in_track_id = True
00183         elif name == "name" and self.in_track:
00184             self.in_track_name = True
00185         elif name == "duration" and self.in_track:
00186             self.in_track_duration = True
00187         elif name == "numalbum" and self.in_track:
00188             self.in_track_numalbum = True
00189         elif name == "filename" and self.in_track:
00190             self.in_track_filename = True
00191         elif name == "mbgid" and self.in_track:
00192             self.in_track_mbgid = True
00193         elif name == "id3genre" and self.in_track:
00194             self.in_track_id3genre = True
00195         elif name == "license" and self.in_track:
00196             self.in_track_license = True
00197         elif name == "tag" and self.in_track:
00198             self.in_track_tag = True
00199             self.track_tags = []
00200         elif name == "idstr" and self.in_track:
00201             self.in_track_tag_idstr = True
00202         elif name == "weight" and self.in_track:
00203             self.in_track_tag_weight = True
00204 
00205     ###################################################################
00206     #
00207     # called for the chars between an opening and closing element
00208     # RETURNS: n/A
00209     #     
00210     def characters (self, ch):
00211         # ARTIST
00212         if self.in_artist_id:
00213             self.artist_id = ch
00214         elif self.in_artist_name:
00215             self.artist_name = ch
00216         elif self.in_artist_url:
00217             self.artist_url = ch
00218         elif self.in_artist_image:
00219             self.artist_image = ch
00220         elif self.in_artist_mbgid :
00221             self.artist_mbgid = ch
00222         elif self.in_artist_country:
00223             self.artist_country = ch
00224         elif self.in_artist_state:
00225             self.artist_state = ch
00226         elif self.in_artist_city:
00227             self.artist_city = ch
00228         elif self.in_artist_latitude:
00229             self.artist_latitude = ch
00230         elif self.in_artist_longitude:
00231             self.artist_longitude = ch
00232 
00233         # ALBUM
00234         if self.in_album_id :
00235             self.album_id = ch
00236         elif self.in_album_name:
00237             self.album_name = ch
00238         elif self.in_album_url:
00239             self.album_url = ch
00240         elif self.in_album_releasedate:
00241             self.album_releasedate = ch
00242         elif self.in_album_filename:
00243             self.album_filename = ch
00244         elif self.in_album_id3genre:
00245             self.album_id3genre = ch
00246         elif self.in_album_mbgid:
00247             self.album_mbgid = ch
00248         elif self.in_album_license_artwork:
00249             self.album_license_artwork = ch
00250 
00251         # TRACK
00252         if self.in_track_id:
00253             self.track_id = ch
00254         elif self.in_track_name:
00255             self.track_name = ch
00256         elif self.in_track_duration:
00257             self.track_duration = ch
00258         elif self.in_track_numalbum:
00259             self.track_numalbum = ch
00260         elif self.in_track_filename:
00261             self.track_filename = ch
00262         elif self.in_track_mbgid:
00263             self.track_mbgid = ch
00264         elif self.in_track_id3genre:
00265             self.track_id3genre = ch
00266         elif self.in_track_license:
00267             self.track_license = ch
00268         elif self.in_track_tag_weight:
00269             self.track_tag_weight = ch
00270         elif self.in_track_tag_idstr:
00271             self.track_tag_idstr = ch
00272 
00273 
00274     ###################################################################
00275     #
00276     # called whenever an element is closed
00277     # RETURNS: n/A
00278     #    
00279     def endElement(self,name):
00280         # ARTIST
00281         if not self.in_album and not self.in_track:
00282             if name == "artist":
00283                 self.in_artist = False
00284                 artist = {}
00285                 artist['id'] = self.artist_id
00286                 artist['name'] = self.artist_name
00287                 artist['url'] = self.artist_url
00288                 artist['image'] = self.artist_image
00289                 artist['mbgid'] = self.artist_mbgid
00290                 artist['country'] = self.artist_country
00291                 artist['state'] = self.artist_state
00292                 artist['city'] = self.artist_city
00293                 artist['latitude'] = self.artist_latitude
00294                 artist['longitude'] = self.artist_longitude
00295                 artist['albumcount'] = self.albumcount
00296                 self.artists.append(artist)
00297 
00298                 self.total_artists += 1
00299                 if self.total_artists > self.last_artists1 + 10:
00300                     if self.parent.pyjama:
00301                         self.parent.pyjama.Events.raise_event("dbtools_message", "xml", self.total_artists)
00302                     else:
00303                         print "%i Artists, %i Albums and %i Tracks and %i Tags computed." % (self.total_artists, self.total_albums, self.total_tracks, self.total_tags)
00304                     self.last_artists1 = self.total_artists
00305                 if self.total_artists > self.last_artists2 + 500:
00306                     if not self.parent.pyjama: print "... writing to database"
00307                     self.parent.insert_artists(self.artists)
00308                     self.parent.insert_albums(self.albums)
00309                     self.parent.insert_tracks(self.tracks)
00310                     self.parent.insert_tags(self.tags)
00311                     self.artists = []
00312                     self.albums = []
00313                     self.tracks = []
00314                     self.tags = []
00315                     self.last_artists2 = self.total_artists
00316         
00317                 self.albumcount = 0
00318                 self.set_artist_vars()
00319             elif name == "id" and self.in_artist:
00320                 self.in_artist_id = False
00321             elif name == "name" and self.in_artist:
00322                 self.in_artist_name = False
00323             elif name == "url" and self.in_artist:
00324                 self.in_artist_url = False
00325             elif name == "image" and self.in_artist:
00326                 self.in_artist_image = False
00327             elif name == "mbgid" and self.in_artist:
00328                 self.in_artist_mbgid = False
00329             elif name == "country" and self.in_artist:
00330                 self.in_artist_country = False
00331             elif name == "state" and self.in_artist:
00332                 self.in_artist_state = False
00333             elif name == "city" and self.in_artist:
00334                 self.in_artist_city = False
00335             elif name == "latitude" and self.in_artist:
00336                 self.in_artist_latitude = False
00337             elif name == "longitude" and self.in_artist:
00338                 self.in_artist_longitude = False
00339 
00340         # ALBUM
00341         if not self.in_track:
00342             if name == "album":
00343                 self.in_album = False
00344                 album = {}
00345                 album['id'] = self.album_id
00346                 album['name'] = self.album_name
00347                 album['url'] = self.album_url
00348                 album['releasedate'] = self.album_releasedate
00349                 album['filename'] = self.album_filename
00350                 album['id3genre'] = self.album_id3genre
00351                 album['mbgid'] = self.album_mbgid
00352                 album['license_artwork'] = self.album_license_artwork
00353                 album['trackcount'] = self.trackcount
00354                 album['artist_id'] = self.artist_id
00355                 self.albums.append(album)
00356 
00357                 self.total_albums += 1
00358                 self.trackcount = 0
00359 
00360 
00361             elif name == "id" and self.in_album:
00362                 self.in_album_id = False
00363             elif name == "name" and self.in_album:
00364                 self.in_album_name = False
00365             elif name == "url" and self.in_album:
00366                 self.in_album_url = False
00367             elif name == "releasedate" and self.in_album:
00368                 self.in_album_releasedate = False
00369             elif name == "filename" and self.in_album:
00370                 self.in_album_filename = False
00371             elif name == "id3genre" and self.in_album:
00372                 self.in_album_id3genre = False
00373             elif name == "mbgid" and self.in_album:
00374                 self.in_album_mbgid = False
00375             elif name == "license_artwork" and self.in_album:
00376                 self.in_album_license_artwork = False
00377 
00378         if self.in_track:
00379             if name == "idstr": self.in_track_tag_idstr = False
00380             if name == "weight": self.in_track_tag_weight = False
00381             if name == "tag":
00382                 self.in_track_tag = False
00383                 self.total_tags += 1
00384 
00385                 tag = {"artist_id":self.artist_id, "album_id":self.album_id,"track_id":self.track_id, "idstr": self.track_tag_idstr, "weight":self.track_tag_weight}
00386                 self.tags.append(tag)
00387 
00388                 self.track_tags = []
00389 
00390         # TRACK
00391         if name == "track":
00392             self.in_track = False
00393             track = {}
00394             track['id'] = self.track_id
00395             track['name'] = self.track_name
00396             track['duration'] = self.track_duration
00397             track['numalbum'] = self.track_numalbum
00398             track['filename'] = self.track_filename
00399             track['mbgid'] = self.track_mbgid
00400             track['id3genre'] = self.track_id3genre
00401             track['license'] = self.track_license
00402             track['album_id'] = self.album_id
00403             track['artist_id'] = self.artist_id
00404             self.tracks.append(track)
00405             self.total_tracks += 1
00406         elif name == "id" and self.in_track:
00407             self.in_track_id = False
00408         elif name == "name" and self.in_track:
00409             self.in_track_name = False
00410         elif name == "duration" and self.in_track:
00411             self.in_track_duration = False
00412         elif name == "numalbum" and self.in_track:
00413             self.in_track_numalbum = False
00414         elif name == "filename" and self.in_track:
00415             self.in_track_filename = False
00416         elif name == "mbgid" and self.in_track:
00417             self.in_track_mbgid = False
00418         elif name == "id3genre" and self.in_track:
00419             self.in_track_id3genre = False
00420         elif name == "license" and self.in_track:
00421             self.in_track_license = False
00422 

Generated on Thu Jun 4 19:08:24 2009 for Pyjama by  doxygen 1.5.8