view lm @ 2:6f4b5e24cf08

README update
author Goffi <goffi@goffi.org>
date Wed, 21 Jul 2010 17:04:39 +0800
parents df1b98df8be9
children
line wrap: on
line source

#!/usr/bin/python
# -*- coding: utf-8 -*-

"""
lm: list movies (or list media)
Copyright (C) 2010  Jérôme Poisson (goffi@goffi.org)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""


import re
import sys
import os,os.path
import cPickle as pickle
from difflib import SequenceMatcher
from optparse import OptionParser #To be replace by argparse ASAP

try:
    import imdb
except:
    print "lm needs IMDbPY to work, please install it"
    sys.exit(2)

NAME = 'lm (list movies)'
VERSION = '0.1'

ABOUT = NAME+" v"+VERSION+""" (c) Jérôme Poisson (aka Goffi) 2010

---
"""+NAME+""" Copyright (C) 2010  Jérôme Poisson (aka Goffi)
This program comes with ABSOLUTELY NO WARRANTY;
This is free software, and you are welcome to redistribute it
under certain conditions.
---

This software is a command line tool for listing movies using IMDb metadata
Get the latest version at http://www.goffi.org
"""


movie_ext = [u'.divx', u'.mov', u'.avi', u'.ogv', u'.rmvb', u'.mkv', u'.mpg', u'.wmv', u'.mp4']
forbidden_words = ['divx','dvdrip','xvid']

i = imdb.IMDb()

COLOR_RED = "\033[00;31m"
COLOR_GREEN = "\033[01;32m"
COLOR_BLUE = "\033[01;34m"
COLOR_END = '\033[0m'

class FilterParsingError(Exception):
    pass

class ListMedia():

    def __init__(self):
        self.load_cache()

    def load_cache(self):
        """try:
            with open(os.path.expanduser('~/.lm_imdb_cache'),'r') as f:
                self.found_cache = pickle.load(f)
        except:
            self.found_cache = {}"""
        try:
            with open(os.path.expanduser('~/.lm_movies_cache'),'r') as f:
                self.movies = pickle.load(f)
        except:
            self.movies = {}
        #self.movies = {}

    def save_cache(self):
        #with open(os.path.expanduser('~/.lm_imdb_cache'),'w') as f:
        #    pickle.dump(self.found_cache,f)
        with open(os.path.expanduser('~/.lm_movies_cache'),'w') as f:
            pickle.dump(self.movies,f)
    
    def __set_id(self, imdb_id, filename):
        """Set an individual id"""
        try:
            found = i.get_movie(self.options.set_id)
        except imdb.IMDbError:
            print "connexion error"
            sys.exit(3)
        if not found:
            print "No movie found with the id [%s]" % imdb_id
            sys.exit(2)
        if not self.movies.has_key(filename):
            self.movies[filename] = {'guessed_title':found.get('title') or filename}
        self.__fill_metadata(self.movies[filename], found)
        self.movies[filename]['unsure'] = False
        self.save_cache()

    def parse_arguments(self):
        _usage="""
        %prog [options] [FILE1 FILE2 ...]
        %prog --set-id [IMDb id] MOVIE_FILE

        %prog --help for options list
        """
        parser = OptionParser(usage=_usage,version=ABOUT)

        parser.add_option('-a','--alphabetical', action="store_true", default=False,
                          help="sort by alphabetical order of title instead of rating")
        parser.add_option('-r','--reverse', action="store_true", default=False,
                          help="show media in reverse order")
        parser.add_option('-f','--filter', action="store", type="string",
                          help="filter (cf README)")
        parser.add_option('--set-id', action="store", type="string",
                          help="manually give id to a movie (only one movie must be given in argument)")
        parser.add_option('-l','--long', action="store_true",
                          help="Show long information on movie")
        parser.add_option('-L','--very-long', action="store_true",
                          help="Show full information on movie")
        parser.add_option('-o','--outline', action="store_true",
                          help="Show plot outline")
        parser.add_option('--confirm', action="store_true",
                          help="Confirm unsure movies")
        parser.add_option('-s', '--show', action="store_true",
                          help="Show IMDb webpage of each movie in default navigator (DON'T USE IF YOU'RE LISTING A LOT OF FILES)")
        (self.options, args) = parser.parse_args()
        if self.options.set_id:
            if len(args) != 1:
                print "Only one movie can be given with set-id option"
                sys.exit(2)
            self.__set_id(self.options.set_id,args[0])
            sys.exit(0)
        if self.options.show:
            import webbrowser
            global webbrowser
        if not args:
            if self.options.confirm:
                print "You have to explicitly give movies when using --confirm"
                exit(2)
            args=['.']
        return args

    def post_traitement(self,basenames):
        """Must be called at the end"""
        if self.options.confirm:
            for movie in basenames:
                if self.movies.has_key(movie):
                    self.movies[movie]['unsure'] = False
            self.save_cache()

    def get_files(self,args):
        """Return files from args, files contained for directories"""
        result = []
        for arg in args:
            if not arg:
                continue #we don't want empty arg
            real_path = os.path.expanduser(arg).decode('utf-8')
            if os.path.isdir(real_path):
                base_path = arg+'/' if arg[-1]!='/' else arg
                if base_path == "./":
                    base_path = ''
                result.extend([base_path+basename for basename in os.listdir(real_path)])
            elif os.path.isfile(real_path):
                result.append(arg.decode('utf-8'))
        return result

    def guess_titles(self,movie_files):
        """Try to guess title from movie filename, and fill movies 'guessed_title'
        @param movie_files: filenames to parse"""
        #some useful regex
        title_reg = re.compile('^[^[(]+') #we take everything before information in bracket or square bracket, as these info are usually not part of the title
        before_year_reg = re.compile(r'(.*[^0-9])[0-9]{4}[^0-9].*') #the year is most of time placed between the title and other information, we are intersted by what is before
        upper_reg = re.compile(r'(^.+?)[A-Z]{2}.*') #in some case, we have the title with lowercases, and other info (e.g. language) fully uppercase, this regex test this
        #We now try to clean the filename, to guess the real title from it, which we will need for our imdb request

        for filename in movie_files:
            if self.movies.has_key(filename) and self.movies[filename].has_key('guessed_title'):
                #if movie already in cache, we pass it
                continue
            file_tuple = os.path.splitext(filename)
            tmp_title = (re.findall(title_reg,file_tuple[0]) or [file_tuple[0]])[0].replace('.',' ').replace('_',' ') #first regex & '.' and '_' replaced by space
            tmp_title = re.sub(before_year_reg, r'\1', tmp_title) or tmp_title #2nd regex
            title = re.sub(upper_reg,r'\1', tmp_title) or tmp_title #3rd regex
            
            if len(title) < 3: #In some cases, the previous regex give a wrong title, we try to detect this by cancelling too short title
                title = tmp_title
            
            for forbidden in forbidden_words: #we now remove words which can stay in the title and are propably not part of it
                if forbidden in title.lower():
                    idx = title.lower().find(forbidden)
                    title = title[:idx]

            #finished, we must have a title more sexy
            self.movies[filename] = {'guessed_title':title}

    def __print_not_found(self, not_found):
        if not_found:
            print "Movies not found:"
            for filename in not_found:
                print filename
            print "---\n\n"

    def post_check(self, movie, current, found):
        """Check after filling, that the found movie has a title close to the filename
        @param movie: filename of the movie
        @param current: dict of metadata for current movie, 'unsure' value will be filled by this method
        @param found: found imdb.Movie.Movie"""
        
        _filename = movie.lower().replace('.',' ').replace('_',' ')
        _title = current['title'].lower().replace('!','').replace('?','')
        ratio = SequenceMatcher(None,_title,_filename[:len(_title)]).ratio()
        
        if ratio < 0.5:
            #Bad ratio, we do additionnal checks

            #print "\nbad ratio(%f): %s ==> %s" % (ratio, movie, current['title'])
            _best_title = _title
            
            if _title in _filename:
                #We can found the title in the filename, sounds good
                #print "title found in filename, OK :)"
                current['unsure'] = False
            else:
                _biggest_ratio = ratio
                
                #We now check with other titles found in IMDB ('akas' key)
                for other_title in [title.split(' - ')[0].replace('"','') for title in (found.get('akas') or [])] or '':
                    current_ratio = SequenceMatcher(None,other_title,_filename[:len(other_title)]).ratio()
                    if current_ratio > _biggest_ratio:
                        _biggest_ratio = current_ratio
                        _best_title = other_title
                #print "biggest ratio ==> %f (for [%s]) -- %s" % (_biggest_ratio, _best_title, 'OK' if _biggest_ratio>0.5 else 'refused')
                current['unsure'] = False if _biggest_ratio >= 0.5 else True
        else:
            current['unsure'] = False
            
        if current['unsure']:
            current['best_matching'] = _best_title
   
    def __fill_metadata(self, current, found):
        """Fill metadata for one movie
        @param current: metadata dict to fill
        @param found: found imdb.Movie.Movie"""
        current['id'] = found.movieID
        current['title'] = found.get('title')
        current['canonical_title'] = found.get('smart canonical title') or title
        current['rating'] = found.get('rating')
        current['year'] = found.get('year')
        current['genre'] = found.get('genre') or []
        current['director'] = [director.get('name') for director in (found.get('director') or [])]
        current['short_summary'] = found.get('plot outline')
        current['summary'] = (found.get('plot') or [''])[0]
        current['cast'] = [actor.get('name') for actor in (found.get('cast') or [])]
        return current

    def get_metadata(self, files):
        """Get metadata for files not already in cache
        @param files: list of filename (just basename, with extension)"""
    
        _movie_files = filter(lambda file:os.path.splitext(file)[1].lower() in movie_ext,files) #We only wants movies

        self.guess_titles(_movie_files)
        _movies_to_get = filter(lambda movie:not self.movies[movie].has_key('title'), self.movies) #We want to parse movies not already parsed
        if not _movies_to_get:
            return

        idx = 1
        total = len(_movies_to_get)
        not_found = []
        last_len = 0
        
        for movie in _movies_to_get:
            out_str = u"Getting metadata: [%(index)i/%(nb_movies)i] %(guessed_title)s\r" % {'index':idx,'nb_movies':total,'filename':movie,'guessed_title':self.movies[movie]['guessed_title']}
            if len(out_str) < last_len:
                sys.stdout.write(' '*last_len+'\r')
            last_len = len(out_str)
            sys.stdout.write(out_str.encode('utf-8'))
            sys.stdout.flush()
            idx+=1

            try:
                results = i.search_movie(self.movies[movie]['guessed_title'])
                #results = [self.found_cache[movie]] if self.found_cache.has_key(movie) else []
                if not results:
                    not_found.append(movie)
                    self.movies[movie]['title'] = None
                else:
                    found = results[0]
                    i.update(found)
                    #self.found_cache[movie] = found
                    current = self.__fill_metadata(self.movies[movie], found)
                    self.post_check(movie, current, found)
                        
            except imdb.IMDbError, e:
                print "Connexion error, current movie: [%s]" % movie
                self.__print_not_found(not_found)
                print "\n\n---\n\n"
                print e
                self.save_cache()
                sys.exit(2)

            if idx%15==0:
                #We save work in case of problem
                self.save_cache()


        self.save_cache()
        print "\n"
        self.__print_not_found(not_found)
   
    def user_filter(self, file_tuple):
        """Filter movies according to user given arguments"""
        filt = self.options.filter
        pos = 0
        try:
            while filt:
                if filt[0]!='@':
                    raise FilterParsingError
                end = filt.find(':')
                if end == -1:
                    raise FilterParsingError
                filter_type = filt[1:end]
                if not filter_type in ['genre','director','actor']:
                    raise FilterParsingError
                if filter_type=="actor":
                    filter_type='cast'
                filt = filt[end+1:]
                end = filt.find('@')
                if end == -1:
                    end = None
                keys = set([key.lower() for key in filt[:end].split(',')])
                filt = filt[end:] if end else ''
                file_tuple = filter(lambda m:set([key.lower() for key in self.movies[m[1]][filter_type]]).intersection(keys),file_tuple)
        except FilterParsingError,IndexError:
            print "Invalid filter ! Please read README for syntax"
            exit(2)

        return file_tuple

    def pretty_print(self, file_tuple):
        """Print movie with metadata and colors according to arguments"""
        filename,basename = file_tuple 
        current = self.movies[basename]
        values_dict = {'b':COLOR_BLUE,
                       'e':COLOR_END,
                       'header':COLOR_RED + '/!\\ ' + COLOR_END if current['unsure'] else '',
                       'title':COLOR_GREEN + current['title'] + COLOR_END,
                       'rating':unicode(current['rating']),
                       'year':current['year'],
                       'genre':"%s" % ', '.join(current['genre']),
                       'filename':filename,
                       'director':', '.join(current['director'])
                      }
        
        if self.options.very_long:
            out_str = u"%(header)s%(title)s (%(b)srating%(e)s: %(rating)s)\n%(b)syear%(e)s: %(year)s %(b)sgenre%(e)s: %(genre)s\n%(b)sfile%(e)s: %(filename)s\n%(b)sdirector%(e)s: %(director)s\n" % values_dict 
            cast_header = COLOR_BLUE+u"cast"+COLOR_END+": "
            len_cast_header = len(cast_header) - len(COLOR_BLUE) - len(COLOR_END)
            out_str+=cast_header
            first = True
            for actor in current['cast']:
                if first:
                    first = False
                    out_str += actor+'\n'
                else:
                    out_str+=len_cast_header*u' '+actor+'\n'
            out_str += "\n" + COLOR_BLUE + "summary"+COLOR_END+": %s\n---\n" % current['summary']
        elif self.options.long:
            out_str = u"%(header)s%(title)s (%(year)s,%(rating)s) [%(b)s%(genre)s%(e)s] from %(director)s: %(filename)s\n" % values_dict 
        else:
            out_str = u"%(header)s%(title)s (%(filename)s)\n" % values_dict
        sys.stdout.write(out_str.encode('utf-8'))
        if self.options.outline and current['short_summary']:
            sys.stdout.write(unicode("\t"+current['short_summary']+'\n').encode('utf-8'))

    def show_list(self, files):
        """Show the list of files, using metadata according to arguments"""
        files_tuple = [(path, os.path.basename(path)) for path in files]
        _movie_files = filter(lambda file:os.path.splitext(file[1])[1].lower() in movie_ext,files_tuple) #We only wants movies
        _movie_files = filter(lambda m: self.movies[m[1]]['title'] != None, _movie_files) #We want only parsed movies
        
        if self.options.filter:
            _movie_files = self.user_filter(_movie_files)
        if not _movie_files:
            print "No movie found"
            exit(1)
        
        if self.options.alphabetical:
            _key=lambda m: self.movies[m[1]]['canonical_title'].lower()
        else:
            _key=lambda m: self.movies[m[1]]['rating']
        _movie_files.sort(key=_key,reverse=self.options.reverse)
        
        for _file_tuple in _movie_files:
            self.pretty_print(_file_tuple)
            if self.options.show:
                webbrowser.open_new_tab(imdb.imdbURL_movie_main % self.movies[_file_tuple[1]]['id'])



LM = ListMedia()
args = LM.parse_arguments()
files = LM.get_files(args)
basenames = set(map(os.path.basename,files))
LM.get_metadata(basenames)
LM.show_list(files)
LM.post_traitement(basenames)