00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 from xml.sax import make_parser
00031 from xml.sax.handler import ContentHandler
00032
00033
00034
00035 def parse_xml(xml, parent):
00036 parser = make_parser()
00037 Handler = Magnatune_handler(parent)
00038 parser.setContentHandler(Handler)
00039 parser.parse(open(xml))
00040 parent.insert_artists(Handler.artists)
00041 parent.insert_albums(Handler.albums)
00042 parent.insert_tracks(Handler.tracks)
00043 parent.insert_tags(Handler.tags)
00044 print "%i Artists, %i Albums and %i Tracks and %i Tags computed." % (Handler.total_artists, Handler.total_albums, Handler.total_tracks, Handler.total_tags)
00045 return
00046
00047
00048 class Magnatune_handler(ContentHandler):
00049
00050 def __init__(self, parent):
00051 self.parent = parent
00052 self.artists = []
00053 self.albums = []
00054 self.tracks = []
00055 self.tags = []
00056
00057
00058 self.in_artist = False
00059 self.in_artist_id = False
00060 self.in_artist_name = False
00061 self.in_artist_url = False
00062 self.in_artist_image = False
00063 self.in_artist_mbgid = False
00064 self.in_artist_country = False
00065 self.in_artist_state = False
00066 self.in_artist_city = False
00067 self.in_artist_latitude = False
00068 self.in_artist_longitude = False
00069
00070 self.in_album = False
00071 self.in_album_id = False
00072 self.in_album_name = False
00073 self.in_album_url = False
00074 self.in_album_releasedate = False
00075 self.in_album_filename = False
00076 self.in_album_id3genre = False
00077 self.in_album_mbgid = False
00078 self.in_album_license_artwork = False
00079
00080 self.in_track = False
00081 self.in_track_id = False
00082 self.in_track_name = False
00083 self.in_track_duration = False
00084 self.in_track_numalbum = False
00085 self.in_track_filename = False
00086 self.in_track_mbgid = False
00087 self.in_track_id3genre = False
00088 self.in_track_license = False
00089
00090 self.in_track_tag = False
00091 self.in_track_tag_idstr = False
00092 self.in_track_tag_weight = False
00093
00094 self.total_artists = 0
00095 self.last_artists1 = 0
00096 self.last_artists2 = 0
00097 self.total_albums = 0
00098 self.total_tracks = 0
00099 self.total_tags = 0
00100
00101 self.trackcount = 0
00102 self.albumcount = 0
00103 self.lastid = 0
00104
00105 self.set_artist_vars()
00106 self.set_album_vars()
00107 self.set_track_vars()
00108
00109 self.track_tags = None
00110
00111
00112
00113
00114
00115
00116 def set_artist_vars(self):
00117 self.artist_id, self.artist_name, self.artist_url, self.artist_image, self.artist_mbgid, self.artist_country, self.artist_state, self.artist_city, self.artist_latitude, self.artist_longitude = '','','','','','','','','', ''
00118 def set_album_vars(self):
00119 self.album_id, self.album_name, self.album_url, self.album_releasedate, self.album_filename, self.album_id3genre, self.album_mbgid, self.album_license_artwork = '','','','','','','',''
00120 def set_track_vars(self):
00121 self.track_id, self.track_name, self.track_duration, self.track_numalbum, self.track_filename, self.track_mbgid, self.track_id3genre, self.track_license, self.track_tag_weight, self.track_tag_idstr = '','','','','','','','', '', ''
00122
00123
00124
00125
00126
00127
00128 def startElement(self, name, attrs):
00129
00130 if not self.in_album and not self.in_track:
00131 if name == "artist":
00132 self.in_artist = True
00133 elif name == "id" and self.in_artist:
00134 self.in_artist_id = True
00135 elif name == "name" and self.in_artist:
00136 self.in_artist_name = True
00137 elif name == "url" and self.in_artist:
00138 self.in_artist_url = True
00139 elif name == "image" and self.in_artist:
00140 self.in_artist_image = True
00141 elif name == "mbgid" and self.in_artist:
00142 self.in_artist_mbgid = True
00143 elif name == "country" and self.in_artist:
00144 self.in_artist_country = True
00145 elif name == "state" and self.in_artist:
00146 self.in_artist_state = True
00147 elif name == "city" and self.in_artist:
00148 self.in_artist_city = True
00149 elif name == "latitude" and self.in_artist:
00150 self.in_artist_latitude = True
00151 elif name == "longitude" and self.in_artist:
00152 self.in_artist_longitude = True
00153 elif name == "album":
00154 self.in_album = True
00155 self.albumcount += 1
00156
00157
00158 if not self.in_track:
00159 if name == "id" and self.in_album:
00160 self.in_album_id = True
00161 elif name == "name" and self.in_album:
00162 self.in_album_name = True
00163 elif name == "url" and self.in_album:
00164 self.in_album_url = True
00165 elif name == "releasedate" and self.in_album:
00166 self.in_album_releasedate = True
00167 elif name == "filename" and self.in_album:
00168 self.in_album_filename = True
00169 elif name == "id3genre" and self.in_album:
00170 self.in_album_id3genre = True
00171 elif name == "mbgid" and self.in_album:
00172 self.in_album_mbgid = True
00173 elif name == "license_artwork" and self.in_album:
00174 self.in_album_license_artwork = True
00175
00176
00177
00178 if name == "track":
00179 self.in_track = True
00180 self.trackcount += 1
00181 elif name == "id" and self.in_track:
00182 self.in_track_id = True
00183 elif name == "name" and self.in_track:
00184 self.in_track_name = True
00185 elif name == "duration" and self.in_track:
00186 self.in_track_duration = True
00187 elif name == "numalbum" and self.in_track:
00188 self.in_track_numalbum = True
00189 elif name == "filename" and self.in_track:
00190 self.in_track_filename = True
00191 elif name == "mbgid" and self.in_track:
00192 self.in_track_mbgid = True
00193 elif name == "id3genre" and self.in_track:
00194 self.in_track_id3genre = True
00195 elif name == "license" and self.in_track:
00196 self.in_track_license = True
00197 elif name == "tag" and self.in_track:
00198 self.in_track_tag = True
00199 self.track_tags = []
00200 elif name == "idstr" and self.in_track:
00201 self.in_track_tag_idstr = True
00202 elif name == "weight" and self.in_track:
00203 self.in_track_tag_weight = True
00204
00205
00206
00207
00208
00209
00210 def characters (self, ch):
00211
00212 if self.in_artist_id:
00213 self.artist_id = ch
00214 elif self.in_artist_name:
00215 self.artist_name = ch
00216 elif self.in_artist_url:
00217 self.artist_url = ch
00218 elif self.in_artist_image:
00219 self.artist_image = ch
00220 elif self.in_artist_mbgid :
00221 self.artist_mbgid = ch
00222 elif self.in_artist_country:
00223 self.artist_country = ch
00224 elif self.in_artist_state:
00225 self.artist_state = ch
00226 elif self.in_artist_city:
00227 self.artist_city = ch
00228 elif self.in_artist_latitude:
00229 self.artist_latitude = ch
00230 elif self.in_artist_longitude:
00231 self.artist_longitude = ch
00232
00233
00234 if self.in_album_id :
00235 self.album_id = ch
00236 elif self.in_album_name:
00237 self.album_name = ch
00238 elif self.in_album_url:
00239 self.album_url = ch
00240 elif self.in_album_releasedate:
00241 self.album_releasedate = ch
00242 elif self.in_album_filename:
00243 self.album_filename = ch
00244 elif self.in_album_id3genre:
00245 self.album_id3genre = ch
00246 elif self.in_album_mbgid:
00247 self.album_mbgid = ch
00248 elif self.in_album_license_artwork:
00249 self.album_license_artwork = ch
00250
00251
00252 if self.in_track_id:
00253 self.track_id = ch
00254 elif self.in_track_name:
00255 self.track_name = ch
00256 elif self.in_track_duration:
00257 self.track_duration = ch
00258 elif self.in_track_numalbum:
00259 self.track_numalbum = ch
00260 elif self.in_track_filename:
00261 self.track_filename = ch
00262 elif self.in_track_mbgid:
00263 self.track_mbgid = ch
00264 elif self.in_track_id3genre:
00265 self.track_id3genre = ch
00266 elif self.in_track_license:
00267 self.track_license = ch
00268 elif self.in_track_tag_weight:
00269 self.track_tag_weight = ch
00270 elif self.in_track_tag_idstr:
00271 self.track_tag_idstr = ch
00272
00273
00274
00275
00276
00277
00278
00279 def endElement(self,name):
00280
00281 if not self.in_album and not self.in_track:
00282 if name == "artist":
00283 self.in_artist = False
00284 artist = {}
00285 artist['id'] = self.artist_id
00286 artist['name'] = self.artist_name
00287 artist['url'] = self.artist_url
00288 artist['image'] = self.artist_image
00289 artist['mbgid'] = self.artist_mbgid
00290 artist['country'] = self.artist_country
00291 artist['state'] = self.artist_state
00292 artist['city'] = self.artist_city
00293 artist['latitude'] = self.artist_latitude
00294 artist['longitude'] = self.artist_longitude
00295 artist['albumcount'] = self.albumcount
00296 self.artists.append(artist)
00297
00298 self.total_artists += 1
00299 if self.total_artists > self.last_artists1 + 10:
00300 if self.parent.pyjama:
00301 self.parent.pyjama.Events.raise_event("dbtools_message", "xml", self.total_artists)
00302 else:
00303 print "%i Artists, %i Albums and %i Tracks and %i Tags computed." % (self.total_artists, self.total_albums, self.total_tracks, self.total_tags)
00304 self.last_artists1 = self.total_artists
00305 if self.total_artists > self.last_artists2 + 500:
00306 if not self.parent.pyjama: print "... writing to database"
00307 self.parent.insert_artists(self.artists)
00308 self.parent.insert_albums(self.albums)
00309 self.parent.insert_tracks(self.tracks)
00310 self.parent.insert_tags(self.tags)
00311 self.artists = []
00312 self.albums = []
00313 self.tracks = []
00314 self.tags = []
00315 self.last_artists2 = self.total_artists
00316
00317 self.albumcount = 0
00318 self.set_artist_vars()
00319 elif name == "id" and self.in_artist:
00320 self.in_artist_id = False
00321 elif name == "name" and self.in_artist:
00322 self.in_artist_name = False
00323 elif name == "url" and self.in_artist:
00324 self.in_artist_url = False
00325 elif name == "image" and self.in_artist:
00326 self.in_artist_image = False
00327 elif name == "mbgid" and self.in_artist:
00328 self.in_artist_mbgid = False
00329 elif name == "country" and self.in_artist:
00330 self.in_artist_country = False
00331 elif name == "state" and self.in_artist:
00332 self.in_artist_state = False
00333 elif name == "city" and self.in_artist:
00334 self.in_artist_city = False
00335 elif name == "latitude" and self.in_artist:
00336 self.in_artist_latitude = False
00337 elif name == "longitude" and self.in_artist:
00338 self.in_artist_longitude = False
00339
00340
00341 if not self.in_track:
00342 if name == "album":
00343 self.in_album = False
00344 album = {}
00345 album['id'] = self.album_id
00346 album['name'] = self.album_name
00347 album['url'] = self.album_url
00348 album['releasedate'] = self.album_releasedate
00349 album['filename'] = self.album_filename
00350 album['id3genre'] = self.album_id3genre
00351 album['mbgid'] = self.album_mbgid
00352 album['license_artwork'] = self.album_license_artwork
00353 album['trackcount'] = self.trackcount
00354 album['artist_id'] = self.artist_id
00355 self.albums.append(album)
00356
00357 self.total_albums += 1
00358 self.trackcount = 0
00359
00360
00361 elif name == "id" and self.in_album:
00362 self.in_album_id = False
00363 elif name == "name" and self.in_album:
00364 self.in_album_name = False
00365 elif name == "url" and self.in_album:
00366 self.in_album_url = False
00367 elif name == "releasedate" and self.in_album:
00368 self.in_album_releasedate = False
00369 elif name == "filename" and self.in_album:
00370 self.in_album_filename = False
00371 elif name == "id3genre" and self.in_album:
00372 self.in_album_id3genre = False
00373 elif name == "mbgid" and self.in_album:
00374 self.in_album_mbgid = False
00375 elif name == "license_artwork" and self.in_album:
00376 self.in_album_license_artwork = False
00377
00378 if self.in_track:
00379 if name == "idstr": self.in_track_tag_idstr = False
00380 if name == "weight": self.in_track_tag_weight = False
00381 if name == "tag":
00382 self.in_track_tag = False
00383 self.total_tags += 1
00384
00385 tag = {"artist_id":self.artist_id, "album_id":self.album_id,"track_id":self.track_id, "idstr": self.track_tag_idstr, "weight":self.track_tag_weight}
00386 self.tags.append(tag)
00387
00388 self.track_tags = []
00389
00390
00391 if name == "track":
00392 self.in_track = False
00393 track = {}
00394 track['id'] = self.track_id
00395 track['name'] = self.track_name
00396 track['duration'] = self.track_duration
00397 track['numalbum'] = self.track_numalbum
00398 track['filename'] = self.track_filename
00399 track['mbgid'] = self.track_mbgid
00400 track['id3genre'] = self.track_id3genre
00401 track['license'] = self.track_license
00402 track['album_id'] = self.album_id
00403 track['artist_id'] = self.artist_id
00404 self.tracks.append(track)
00405 self.total_tracks += 1
00406 elif name == "id" and self.in_track:
00407 self.in_track_id = False
00408 elif name == "name" and self.in_track:
00409 self.in_track_name = False
00410 elif name == "duration" and self.in_track:
00411 self.in_track_duration = False
00412 elif name == "numalbum" and self.in_track:
00413 self.in_track_numalbum = False
00414 elif name == "filename" and self.in_track:
00415 self.in_track_filename = False
00416 elif name == "mbgid" and self.in_track:
00417 self.in_track_mbgid = False
00418 elif name == "id3genre" and self.in_track:
00419 self.in_track_id3genre = False
00420 elif name == "license" and self.in_track:
00421 self.in_track_license = False
00422