#!/usr/bin/python # -*- coding: utf-8 -*- """ lm: list movies (or list media) Copyright (C) 2010 Jérôme Poisson (goffi@goffi.org) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . """ import re import sys import os,os.path import cPickle as pickle from difflib import SequenceMatcher from optparse import OptionParser #To be replace by argparse ASAP try: import imdb except: print "lm needs IMDbPY to work, please install it" sys.exit(2) NAME = 'lm (list movies)' VERSION = '0.1' ABOUT = NAME+" v"+VERSION+""" (c) Jérôme Poisson (aka Goffi) 2010 --- """+NAME+""" Copyright (C) 2010 Jérôme Poisson (aka Goffi) This program comes with ABSOLUTELY NO WARRANTY; This is free software, and you are welcome to redistribute it under certain conditions. --- This software is a command line tool for listing movies using IMDb metadata Get the latest version at http://www.goffi.org """ movie_ext = [u'.divx', u'.mov', u'.avi', u'.ogv', u'.rmvb', u'.mkv', u'.mpg', u'.wmv', u'.mp4'] forbidden_words = ['divx','dvdrip','xvid'] i = imdb.IMDb() COLOR_RED = "\033[00;31m" COLOR_GREEN = "\033[01;32m" COLOR_BLUE = "\033[01;34m" COLOR_END = '\033[0m' class FilterParsingError(Exception): pass class ListMedia(): def __init__(self): self.load_cache() def load_cache(self): """try: with open(os.path.expanduser('~/.lm_imdb_cache'),'r') as f: self.found_cache = pickle.load(f) except: self.found_cache = {}""" try: with open(os.path.expanduser('~/.lm_movies_cache'),'r') as f: self.movies = pickle.load(f) except: self.movies = {} #self.movies = {} def save_cache(self): #with open(os.path.expanduser('~/.lm_imdb_cache'),'w') as f: # pickle.dump(self.found_cache,f) with open(os.path.expanduser('~/.lm_movies_cache'),'w') as f: pickle.dump(self.movies,f) def __set_id(self, imdb_id, filename): """Set an individual id""" try: found = i.get_movie(self.options.set_id) except imdb.IMDbError: print "connexion error" sys.exit(3) if not found: print "No movie found with the id [%s]" % imdb_id sys.exit(2) if not self.movies.has_key(filename): self.movies[filename] = {'guessed_title':found.get('title') or filename} self.__fill_metadata(self.movies[filename], found) self.movies[filename]['unsure'] = False self.save_cache() def parse_arguments(self): _usage=""" %prog [options] [FILE1 FILE2 ...] %prog --set-id [IMDb id] MOVIE_FILE %prog --help for options list """ parser = OptionParser(usage=_usage,version=ABOUT) parser.add_option('-a','--alphabetical', action="store_true", default=False, help="sort by alphabetical order of title instead of rating") parser.add_option('-r','--reverse', action="store_true", default=False, help="show media in reverse order") parser.add_option('-f','--filter', action="store", type="string", help="filter (cf README)") parser.add_option('--set-id', action="store", type="string", help="manually give id to a movie (only one movie must be given in argument)") parser.add_option('-l','--long', action="store_true", help="Show long information on movie") parser.add_option('-L','--very-long', action="store_true", help="Show full information on movie") parser.add_option('-o','--outline', action="store_true", help="Show plot outline") parser.add_option('--confirm', action="store_true", help="Confirm unsure movies") parser.add_option('-s', '--show', action="store_true", help="Show IMDb webpage of each movie in default navigator (DON'T USE IF YOU'RE LISTING A LOT OF FILES)") (self.options, args) = parser.parse_args() if self.options.set_id: if len(args) != 1: print "Only one movie can be given with set-id option" sys.exit(2) self.__set_id(self.options.set_id,args[0]) sys.exit(0) if self.options.show: import webbrowser global webbrowser if not args: if self.options.confirm: print "You have to explicitly give movies when using --confirm" exit(2) args=['.'] return args def post_traitement(self,basenames): """Must be called at the end""" if self.options.confirm: for movie in basenames: if self.movies.has_key(movie): self.movies[movie]['unsure'] = False self.save_cache() def get_files(self,args): """Return files from args, files contained for directories""" result = [] for arg in args: if not arg: continue #we don't want empty arg real_path = os.path.expanduser(arg).decode('utf-8') if os.path.isdir(real_path): base_path = arg+'/' if arg[-1]!='/' else arg if base_path == "./": base_path = '' result.extend([base_path+basename for basename in os.listdir(real_path)]) elif os.path.isfile(real_path): result.append(arg.decode('utf-8')) return result def guess_titles(self,movie_files): """Try to guess title from movie filename, and fill movies 'guessed_title' @param movie_files: filenames to parse""" #some useful regex title_reg = re.compile('^[^[(]+') #we take everything before information in bracket or square bracket, as these info are usually not part of the title before_year_reg = re.compile(r'(.*[^0-9])[0-9]{4}[^0-9].*') #the year is most of time placed between the title and other information, we are intersted by what is before upper_reg = re.compile(r'(^.+?)[A-Z]{2}.*') #in some case, we have the title with lowercases, and other info (e.g. language) fully uppercase, this regex test this #We now try to clean the filename, to guess the real title from it, which we will need for our imdb request for filename in movie_files: if self.movies.has_key(filename) and self.movies[filename].has_key('guessed_title'): #if movie already in cache, we pass it continue file_tuple = os.path.splitext(filename) tmp_title = (re.findall(title_reg,file_tuple[0]) or [file_tuple[0]])[0].replace('.',' ').replace('_',' ') #first regex & '.' and '_' replaced by space tmp_title = re.sub(before_year_reg, r'\1', tmp_title) or tmp_title #2nd regex title = re.sub(upper_reg,r'\1', tmp_title) or tmp_title #3rd regex if len(title) < 3: #In some cases, the previous regex give a wrong title, we try to detect this by cancelling too short title title = tmp_title for forbidden in forbidden_words: #we now remove words which can stay in the title and are propably not part of it if forbidden in title.lower(): idx = title.lower().find(forbidden) title = title[:idx] #finished, we must have a title more sexy self.movies[filename] = {'guessed_title':title} def __print_not_found(self, not_found): if not_found: print "Movies not found:" for filename in not_found: print filename print "---\n\n" def post_check(self, movie, current, found): """Check after filling, that the found movie has a title close to the filename @param movie: filename of the movie @param current: dict of metadata for current movie, 'unsure' value will be filled by this method @param found: found imdb.Movie.Movie""" _filename = movie.lower().replace('.',' ').replace('_',' ') _title = current['title'].lower().replace('!','').replace('?','') ratio = SequenceMatcher(None,_title,_filename[:len(_title)]).ratio() if ratio < 0.5: #Bad ratio, we do additionnal checks #print "\nbad ratio(%f): %s ==> %s" % (ratio, movie, current['title']) _best_title = _title if _title in _filename: #We can found the title in the filename, sounds good #print "title found in filename, OK :)" current['unsure'] = False else: _biggest_ratio = ratio #We now check with other titles found in IMDB ('akas' key) for other_title in [title.split(' - ')[0].replace('"','') for title in (found.get('akas') or [])] or '': current_ratio = SequenceMatcher(None,other_title,_filename[:len(other_title)]).ratio() if current_ratio > _biggest_ratio: _biggest_ratio = current_ratio _best_title = other_title #print "biggest ratio ==> %f (for [%s]) -- %s" % (_biggest_ratio, _best_title, 'OK' if _biggest_ratio>0.5 else 'refused') current['unsure'] = False if _biggest_ratio >= 0.5 else True else: current['unsure'] = False if current['unsure']: current['best_matching'] = _best_title def __fill_metadata(self, current, found): """Fill metadata for one movie @param current: metadata dict to fill @param found: found imdb.Movie.Movie""" current['id'] = found.movieID current['title'] = found.get('title') current['canonical_title'] = found.get('smart canonical title') or title current['rating'] = found.get('rating') current['year'] = found.get('year') current['genre'] = found.get('genre') or [] current['director'] = [director.get('name') for director in (found.get('director') or [])] current['short_summary'] = found.get('plot outline') current['summary'] = (found.get('plot') or [''])[0] current['cast'] = [actor.get('name') for actor in (found.get('cast') or [])] return current def get_metadata(self, files): """Get metadata for files not already in cache @param files: list of filename (just basename, with extension)""" _movie_files = filter(lambda file:os.path.splitext(file)[1].lower() in movie_ext,files) #We only wants movies self.guess_titles(_movie_files) _movies_to_get = filter(lambda movie:not self.movies[movie].has_key('title'), self.movies) #We want to parse movies not already parsed if not _movies_to_get: return idx = 1 total = len(_movies_to_get) not_found = [] last_len = 0 for movie in _movies_to_get: out_str = u"Getting metadata: [%(index)i/%(nb_movies)i] %(guessed_title)s\r" % {'index':idx,'nb_movies':total,'filename':movie,'guessed_title':self.movies[movie]['guessed_title']} if len(out_str) < last_len: sys.stdout.write(' '*last_len+'\r') last_len = len(out_str) sys.stdout.write(out_str.encode('utf-8')) sys.stdout.flush() idx+=1 try: results = i.search_movie(self.movies[movie]['guessed_title']) #results = [self.found_cache[movie]] if self.found_cache.has_key(movie) else [] if not results: not_found.append(movie) self.movies[movie]['title'] = None else: found = results[0] i.update(found) #self.found_cache[movie] = found current = self.__fill_metadata(self.movies[movie], found) self.post_check(movie, current, found) except imdb.IMDbError, e: print "Connexion error, current movie: [%s]" % movie self.__print_not_found(not_found) print "\n\n---\n\n" print e self.save_cache() sys.exit(2) if idx%15==0: #We save work in case of problem self.save_cache() self.save_cache() print "\n" self.__print_not_found(not_found) def user_filter(self, file_tuple): """Filter movies according to user given arguments""" filt = self.options.filter pos = 0 try: while filt: if filt[0]!='@': raise FilterParsingError end = filt.find(':') if end == -1: raise FilterParsingError filter_type = filt[1:end] if not filter_type in ['genre','director','actor']: raise FilterParsingError if filter_type=="actor": filter_type='cast' filt = filt[end+1:] end = filt.find('@') if end == -1: end = None keys = set([key.lower() for key in filt[:end].split(',')]) filt = filt[end:] if end else '' file_tuple = filter(lambda m:set([key.lower() for key in self.movies[m[1]][filter_type]]).intersection(keys),file_tuple) except FilterParsingError,IndexError: print "Invalid filter ! Please read README for syntax" exit(2) return file_tuple def pretty_print(self, file_tuple): """Print movie with metadata and colors according to arguments""" filename,basename = file_tuple current = self.movies[basename] values_dict = {'b':COLOR_BLUE, 'e':COLOR_END, 'header':COLOR_RED + '/!\\ ' + COLOR_END if current['unsure'] else '', 'title':COLOR_GREEN + current['title'] + COLOR_END, 'rating':unicode(current['rating']), 'year':current['year'], 'genre':"%s" % ', '.join(current['genre']), 'filename':filename, 'director':', '.join(current['director']) } if self.options.very_long: out_str = u"%(header)s%(title)s (%(b)srating%(e)s: %(rating)s)\n%(b)syear%(e)s: %(year)s %(b)sgenre%(e)s: %(genre)s\n%(b)sfile%(e)s: %(filename)s\n%(b)sdirector%(e)s: %(director)s\n" % values_dict cast_header = COLOR_BLUE+u"cast"+COLOR_END+": " len_cast_header = len(cast_header) - len(COLOR_BLUE) - len(COLOR_END) out_str+=cast_header first = True for actor in current['cast']: if first: first = False out_str += actor+'\n' else: out_str+=len_cast_header*u' '+actor+'\n' out_str += "\n" + COLOR_BLUE + "summary"+COLOR_END+": %s\n---\n" % current['summary'] elif self.options.long: out_str = u"%(header)s%(title)s (%(year)s,%(rating)s) [%(b)s%(genre)s%(e)s] from %(director)s: %(filename)s\n" % values_dict else: out_str = u"%(header)s%(title)s (%(filename)s)\n" % values_dict sys.stdout.write(out_str.encode('utf-8')) if self.options.outline and current['short_summary']: sys.stdout.write(unicode("\t"+current['short_summary']+'\n').encode('utf-8')) def show_list(self, files): """Show the list of files, using metadata according to arguments""" files_tuple = [(path, os.path.basename(path)) for path in files] _movie_files = filter(lambda file:os.path.splitext(file[1])[1].lower() in movie_ext,files_tuple) #We only wants movies _movie_files = filter(lambda m: self.movies[m[1]]['title'] != None, _movie_files) #We want only parsed movies if self.options.filter: _movie_files = self.user_filter(_movie_files) if not _movie_files: print "No movie found" exit(1) if self.options.alphabetical: _key=lambda m: self.movies[m[1]]['canonical_title'].lower() else: _key=lambda m: self.movies[m[1]]['rating'] _movie_files.sort(key=_key,reverse=self.options.reverse) for _file_tuple in _movie_files: self.pretty_print(_file_tuple) if self.options.show: webbrowser.open_new_tab(imdb.imdbURL_movie_main % self.movies[_file_tuple[1]]['id']) LM = ListMedia() args = LM.parse_arguments() files = LM.get_files(args) basenames = set(map(os.path.basename,files)) LM.get_metadata(basenames) LM.show_list(files) LM.post_traitement(basenames)