#!/usr/bin/python # Python interpreter path on this system. # #============================================================================ # # NAME # # updateweb.py # # DESCRIPTION # # Python script which maintain my web site. # # It deletes temporary files, rewrites old copyright lines and email address # lines in source files, on the master copy of my web site on my hard disk, # then synchronizes all changes to my web sites on my web servers. # # USAGE # # It's best to use the associated makefile. # But you can call this Python utility from the command line, # # $ python updateweb.py Clean up my main web site, synchronize my # main web site, and log all warnings and errors. # $ python updateweb.py -v Same, but log debug messages also. # $ python updateweb.py -v -a Same, but for my alternate web site. # $ python updateweb.py -c Clean up my local web site only. # # Logs are written to the files, # # log.txt Main web site update. # logAlternate.txt Alternate web site update. # logMaster.txt Local (master) web site cleanup. # # To debug, call # # python -m pdb updateweb.py -v # # then use debug commands b (break), p (print), cl, s, n, r, l, q # # AUTHOR # # Sean E. O'Connor 23 Aug 2007 Version 1.0 released. # Sean E. O'Connor 12 Aug 2008 Version 3.1 released. # # NOTES # # Python interpreter: http://www.python.org # Python tutorial and reference: htttp://docs.python.org/lib/lib.html # Python regular expression howto: http://www.amk.ca/python/howto/regex/ # # LEGAL # # updateweb.py Version 3.0 # # A Python utility program which maintains my web site. # # Copyright (C) 2008 by Sean Erik O'Connor. All Rights Reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; version 2 # of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # The author's address is artifex@seanerikoconnor.freeservers.com. # #============================================================================ #---------------------------------------------------------------------------- # Load Python Packages #---------------------------------------------------------------------------- # OS stuff import sys import os import platform import exceptions import optparse # Regular expressions. import string import re # FTP stuff import ftplib # Dates and times import time import stat import datetime # Logging import logging #---------------------------------------------------------------------------- # Global variables #---------------------------------------------------------------------------- VERBOSE = False # Verbose mode. Prints out everything. ALTERNATEWEBSITE = False # Alternate website. CLEANONLY = False # Clean the master web site only. # 'Enum' types BREADTH_FIRST_SEARCH = "bfs" DEPTH_FIRST_SEARCH = "dfs" # Fields in the file information structure. FILE_NAME = 0 FILE_TYPE = 1 FILE_DATE_TIME = 2 FILE_SIZE = 3 # Parameters file which contains private account information. PARAMETERS_FILE = "/private/param.txt" # Line numbers in the PARAMETERS_FILE, starting from 0. # All other lines are comments, and are skipped. SERVER = 11 USER = 12 PASSWORD = 13 FTP_ROOT = 14 ALT_SERVER = 18 ALT_USER = 19 ALT_PASSWORD = 20 ALT_FTP_ROOT = 21 # Which private directories to skip over when updating the web page. DIR_TO_SKIP = "private|CVS|cvs|PrenticeNetBeans|PrimpolyMac" # Which private files to skip over when updating the web page. # PaintshopPro, Mathematica notebook, anything labeled private or rendered. FILE_TO_SKIP = "\.psp|private|rendered" # File extension for text files. TEXT_FILE_EXT = ".txt" # Suffixes for temporary files which should be deleted. TEMP_FILE_SUFFIXES = r""" # Use Python raw strings. \. # Match the suffix after the dot. # (?: is extension notation; match the regex inside the parentheses # Now match any of the following file extensions: (?: o | obj | lib | exe | # Object files from C, C++, etc compilers pyc | # Object file from Python compiler ilk | pch | pdb | sup | # Temp files from VC++ compiler idb | ncb | opt | plg | # Temp files from VC++ compiler sbr | bsc | map | bce | # Temp files from VC++ compiler res | aps | dep | db | # Temp files from VC++ compiler jbf | # Paintshop Pro class | jar | # Java compiler log | # WS_FTP fas | # CLISP compiler swp | swo | # Vim editor aux | # TeX auxilliary files. DS_Store | _\.DS_Store | # Mac OS X recycle bin _\.Trashes | # Mac OS X recycle bin gdb_history) # GDB history $ """ # and Vim temporary files contain a twiddle somewhere in the name. VIM_TEMP_FILE_EXT = "~" # Suffixes for temporary directories which should be deleted. TEMP_DIR_SUFFIX = r""" (?: Debug | Release | # C++ compiler \.Trashes | \.Trash) # Mac OS X recycle bin $ """ # File extension for internally created temporary file. TEMP_FILE_EXT = ".new" # Suffixes for source files. SOURCE_FILE_SUFFIXES = r""" (?: makefile$ # Any file called makefile is a source file. | \. # Match the file name suffix after the . # Now match any of these suffixes: (?: c | # C source C | # C source c,v | # C source, cvs type cpp | # C++ source h | # C and C++ source h,v | # C and C++ source, cvs type java | # Java source rc | # MFC resource file lif | # Life game file nb | # Mathematica notebook py | # Python source pl | # Perl source prl | # Perl source dsp | # VC++ project file dsw | # VC++ project file mak | # Makefile dev | # Bloodshed C++ project file bat | # DOS shell dat | # Misc data files bashrc | # cygwin startup files bash_profile | bash_logout | vimrc | # Vim startup files \#.* | # CVS admin files lsp) # LISP source $) """ # Suffixes for HTML hypertext and CSS style sheet files. HYPERTEXT_SUFFIX=r""" \. # Match the filename suffix after the . # Now match any of these suffixes: (?: html | htm | # HTML hypertext css) # CSS style sheet $ """ # Match my old hotmail address. # Prevent matching within in this file with the character class brackets. # ?P
places the matching string into the variable 'address' OLD_EMAIL_ADDRESS= r""" (?P
sean[e]rikoconnor@hotmail.com) """ # My new email address to use instead of the old one. NEW_EMAIL_ADDRESS="artifex@seanerikoconnor.freeservers.com" # Change old software version to new software version for Primpoly. OLD_SOFTWARE_VERSION= r""" Primpoly \s+ Version \s+ ([0-9]+) # The two part version number NNN.nnn \. ([0-9]+) """ NEW_SOFTWARE_VERSION="Primpoly Version 9.5" TWO_DIGIT_YEAR_FORMAT="%02d" # Match a copyright line: Extract the copyright which can be (C) or © and the . COPYRIGHT_LINE= r""" Copyright # Copyright. \D+ # Any non-digits. (?P \(C\) | ©) # Match and extract the copyright symbol. \D+ # Any non-digits. (?P[0-9]+) # Match and extract the old copyright year. - # to ([0-9]+) # New copyright year. """ # Match another type of copyright line. # Extract the copyright which can be (C) or © and the . COPYRIGHT_LINE2= r""" Copyright # Copyright. \D+ # Any non-digits. (?P \(C\) | ©) # Match and extract the copyright symbol. \D+ # Any non-digits. (?P[0-9]+) # Match and extract the old copyright year. """ # Match a last updated line. Extract the two digit . LAST_UPDATED_LINE=r""" last\s+ # Match the words "last updated" updated\s+ \d+ # Day number \s+ # Blanks [A-Za-z]+ # Month \s+ # Blanks (?P\d+) # Two digit year """ # Web server root directory. DEFAULT_ROOT_DIR = "/" # Some servers have a limit on the size of individual files which can be uploaded. FILE_SIZE_LIMIT = 200000 # The ftp listing does not always update the current year correctly. # If a file's ftp date is 6 months newer than the master local directory date, # assume it's such an error. # For example, file foo.txt has just been modified on the current date, 1 January 2008. # The corresponding file on the server shows the date of 30 December, but no year, # so we assume the current year 2008. This makes it 11 months newer, which is incorrect. # We can get away with this kludge safely, since it's never an error updating a file # from the master copy, it may just waste time. DAYS_NEWER_FOR_REMOTE_BEFORE_WE_SUSPECT_ITS_ACTUALLY_VERY_OLD = 150 # Upload only if we are newer by more than this amount in fractional days. # Allows for a little slop in time stamps. DAYS_NEWER_FOR_MASTER_BEFORE_UPLOAD = (1.0 / 24.0) * (1.0 / 60.0) * 15.0 # ftp list command lines should be at least this many chars. MIN_FTP_LINE_LENGTH = 7 # Parse an ftp listing, extracting # ftp listings are generally similar to UNIX ls -l listings. # # Some examples: # # (1) Freeservers ftp listing, # # 0 1 2 3 4 5 6 7 8 # drwxr-xr-x 3 1000 1000 4096 Nov 18 2006 Electronics # -rw-r--r-- 1 1000 1000 21984 Jun 4 03:46 StyleSheet.css # # (2) atspace ftp listing, # # drwxr-xr-x 3 seanerikoconnor vusers 49 Apr 7 2006 Electronics # -rw-r--r-- 1 seanerikoconnor vusers 21984 Jun 4 04:03 StyleSheet.css # FTP_LISTING= r""" [drwx-]+ # Unix type file mode. \s+ \d+ # Number of links. \s+ \w+ # Owner. \s+ \w+ # Group. \s+ (?P \d+) # File size in bytes. \s+ (?P \w+) # Month modified. \s+ (?P \d+) # Day modified. \s+ ( (?P \d+) # Hour modified. : (?P \d+) # Minute modified. | (?P \d+) # Year if hours and minutes are absent (happens when year is not the current year) ) \s+ (?P [A-Za-z0-9"'.-_,~()=+#]+) # Path and file name containing numbers, letters and funny characters $ """ #---------------------------------------------------------------------------- # Global functions #---------------------------------------------------------------------------- def getCurrentYear(): """Get the current year.""" return (int)(time.gmtime()[0]) def getCurrentTwoDigitYear(): """Get the last two digits of the current year.""" return (int)(time.gmtime()[0]) % 100 def getPlatform(): """Find out which type of computer platform we are running on. """ # Look at the computer name and try to figure out which of my platforms # I'm running on. if platform.node().endswith( 'Artifex.local' ) or platform.node().endswith( 'Macintosh' ): platformName = "MacBookPro" elif platform.node().endswith( 'Seans-G5-PPC-Mac.local' ) or \ platform.node().endswith( 'oconnors.apple.com' ): platformName = "PPC OSX" elif platform.node().endswith( '1.apple.com' ): platformName = "iMac OSX" elif platform.node() == "seanwinimac": platformName = "iMac Win" elif platform.node() == "Peripatus" or platform.node() == "peripatus": platformName = "PC Win" else: platformName = "MacBookPro" logging.error( "Can't determine the computer platform: guessing my MacBookPro" ) #logging.debug( "Computer platform node name: %s" % platform.node() ) return platformName def getMasterRootDir(): """Get the master root directory for this platform.""" # Each platform has a definite root directory. platformName = getPlatform() # PPC Mac OS X, /Users/seanoconnor/Desktop/Sean/WebSite if platformName == "MacBookPro" or platformName == "PPC OSX": rootDir = "/Users/seanoconnor/Desktop/Sean/WebSite" # Intel iMac OS X, /Volumes/BOOTCAMP/cygwin/home/Sean/Sean/WebSite elif platformName == "iMac OSX": rootDir = "/Volumes/BOOTCAMP/cygwin/home/Sean/WebSite" # Intel iMac Win, /cygdrive/c/cygwin/home/Sean/Sean/WebSite elif platformName == "iMac Win": rootDir = "C:/cygwin/home/Sean/Sean/WebSite" # PC Win, C:/Sean/WebSite elif platformName == "PC Win": rootDir = "C:/Sean/WebSite" #logging.debug( "Root directory = %s" % rootDir ) return rootDir def getSettings(): """Read program settings from a private offline parameter file.""" # Offline file which does not show up in the public web page. inFileName = getMasterRootDir() + PARAMETERS_FILE try: fin = open( inFileName, "r" ) except IOError, e: logging.error( "Cannot open the private settings file %s: %s" % (inFileName, e)) # Read each line of the file, aborting if there is a read error. try: settings = [] line = fin.readline() while line: settings.append( line.strip() ) line = fin.readline() fin.close() except: logging.error( "File I/O error reading private settings file %s: %s. Aborting..." % (inFileName, e)) sys.exit() return settings def isFileInfoType( fileInfo ): "Check if we have a file information structure or merely a simple file name." try: if isinstance( fileInfo, list ): return True elif isinstance( fileInfo, str ): return False else: logging.error( "isFileInfoType found a bad type. Aborting..." ) sys.exit() except TypeError, e: logging.error( "isFileInfoType found a bad type %s. Aborting..." % e ) sys.exit() #---------------------------------------------------------------------------- # Main function #---------------------------------------------------------------------------- def main(): """Main program. Clean up and update my web site.""" #line = "| Primpoly Version 9.5 - A Program for Computing Primitive Polynomials." #print "line = %s" % line #pat = re.compile( OLD_SOFTWARE_VERSION, re.VERBOSE | re.IGNORECASE ) #match = pat.search( line ) #print "match = %s" % match #if match: # newVersion = NEW_SOFTWARE_VERSION # sub = pat.sub( newVersion, line ) # print "new line = %s" % sub #sys.exit() # Print legal. print """ updateweb Version 3.0 - A Python utility program which maintains my web site. Copyright (C) 2008 by Sean Erik O'Connor. All Rights Reserved. It deletes temporary files, rewrites old copyright lines and email address lines in source files, then synchronizes all changes to my web sites. updateweb comes with ABSOLUTELY NO WARRANTY; for details see the GNU General Public License. This is free software, and you are welcome to redistribute it under certain conditions; see the GNU General Public License for details. """ # Get command line options. opt = Opt() # Start logging to file. Verbose turns on logging for # DEBUG, INFO, WARNING, ERROR, and CRITICAL levels. # Otherwise we log only WARNING, ERROR, and CRITICAL levels. if VERBOSE: loglevel = logging.DEBUG else: loglevel = logging.WARNING # Rebind the global value. global LOGFILENAME # Pick the log file name. if ALTERNATEWEBSITE: LOGFILENAME = "logAlternate.txt" elif CLEANONLY: LOGFILENAME = "logMaster.txt" else: LOGFILENAME = "log.txt" logging.basicConfig( level=loglevel, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', filename=LOGFILENAME, filemode='w' ) logging.debug( "*** Begin logging ******************************" ) # Scan the master web site, finding out all files and directories. logging.debug( "Scanning master (local on disk) web site" ) master = MasterWebSite() master.scan() # Clean up the directory by rewriting source code and hypertext and # removing temporary files. logging.debug( "Cleaning up master (local on disk) web site" ) changed = master.clean() # Rescan if any changes happened. if changed: logging.debug( "Detected changes due to to cleanup." ) master.quit() logging.debug( "Disposing of the old scan." ) del master ; master = MasterWebSite() logging.debug( "*** Rescanning ****************************" ) master.scan() else: logging.debug( "No changes detected. Keeping the original scan." ) # Master web site directories. m_dirs = master.directories # Master web site filenames only. m_files = [ fileInfo[ FILE_NAME ] for fileInfo in master.files ] logging.debug( "*** Master Directories **********************" ) for d in m_dirs: logging.debug( "\t %s (d)" % d ) logging.debug( "*** Master Files **********************" ) for f in m_files: logging.debug( "\t %s (f)" % f ) master.quit() # Clean up master web site only. Don't update remote web sites. if CLEANONLY: logging.debug( "Cleanup finished. Exiting..." ) sys.exit() # Scan the remote hosted web site. logging.debug( "Reading private settings." ) settings = getSettings() # Pick which web site to update. if ALTERNATEWEBSITE: logging.debug( "Connecting to alternate remote site." ) remote = RemoteWebSite( settings[ALT_SERVER], settings[ALT_USER], settings[ALT_PASSWORD], settings[ALT_FTP_ROOT] ) else: logging.debug( "Connecting to primary remote site." ) remote = RemoteWebSite( settings[SERVER], settings[USER], settings[PASSWORD], settings[FTP_ROOT] ) logging.debug( "Scanning remote site" ) remote.scan() remote.quit() r_dirs = remote.directories r_files = [ fileInfo[ FILE_NAME ] for fileInfo in remote.files ] logging.debug( "*** Remote Directories **********************" ) for d in r_dirs: logging.debug( "\t remote dir: %s" % d ) logging.debug( "*** Remote Files **********************" ) for f in r_files: logging.debug( "\t remote file: %s" % f ) # Synchronize the master and remote web sites. if ALTERNATEWEBSITE: logging.debug( "Connecting to alternate remote site for synchronization." ) u = UpdateWeb( settings[ALT_SERVER], settings[ALT_USER], settings[ALT_PASSWORD], settings[ALT_FTP_ROOT], master.directories, master.files, remote.directories, remote.files ) else: # Primary web site. logging.debug( "Connecting to primary remote site for synchronization." ) u = UpdateWeb( settings[SERVER], settings[USER], settings[PASSWORD], settings[FTP_ROOT], master.directories, master.files, remote.directories, remote.files ) logging.debug( "Synchronizing the remote site" ) u.update() u.quit() del u del remote del master ; return #---------------------------------------------------------------------------- # Classes and Objects #---------------------------------------------------------------------------- class Opt( object ): """Get the command line options.""" def __init__( self ): """Get command line options""" commandLineParser = optparse.OptionParser() # Log all changes, not just warnings and errors. commandLineParser.add_option( "-v", "--verbose", dest="verbose", help="Turn on verbose mode to log everything", action="store_true" ) commandLineParser.add_option( "-a", "--alternatewebsite", dest="website", help="Pick the alternate web site to update.", action="store_true" ) commandLineParser.add_option( "-c", "--cleanonly", dest="cleanonly", help="Do a cleanup on the master web site only.", action="store_true" ) (options, args) = commandLineParser.parse_args() if len(args) >= 1: commandLineParser.error( "ERROR: updateweb.py should not have any arguments: do python updateweb.py --help" ) # Rebind the global values. global VERBOSE global ALTERNATEWEBSITE global CLEANONLY if options.verbose: VERBOSE = True if options.website: ALTERNATEWEBSITE = True # Secondary web site. if options.cleanonly: CLEANONLY = True class WebSite( object ): """Walk a directory from the root. This is an abstract class used for both master (local to disk) and remote (ftp server) web sites.""" def __init__( self ): """Set up root directories""" # Queue keeps track of directories not yet processed. self.queue = [] # List of all directories traversed. self.directories = [] # List of files traversed, with file information. self.files = [] # Find out the root directory and go there. self.RootDir = self.getRootDir() self.gotoRootDir( self.rootDir ) def getRootDir( self ): """Subclass: Put code here to get the root directory""" return "" def gotoRootDir( self, root="" ): """Subclass: Put code here to go to the root directory""" pass def oneLevelDown( self, dir ): """Subclass: Fill in with a method which returns a list of the directories and files immediately beneath dir""" pass def walk( self, dir, type="bfs" ): """Walk a directory in either depth first or breadth first order. BFS is the default.""" # Get all subfiles and subdirectories off this node. subdirs, subfiles = self.oneLevelDown( dir ) # Add all the subfiles in order. for f in subfiles: name = self.stripRoot( f ) logging.debug( "Web walking the master: Adding file %s to list." % name ) # Some files are private so skip them from consideration. pat=re.compile( FILE_TO_SKIP ) if pat.search( name[FILE_NAME] ): logging.warning( "Webwalking the master: Skipping private file %s" % name[FILE_NAME] ) # Don't upload the log file due to file locking problems. elif name[FILE_NAME].find( LOGFILENAME ) >= 0: logging.debug( "Webwalking the master: Skipping log file %s" % name[FILE_NAME] ) # File size limit on some servers. else: self.files.append( name ) # Queue up the subdirectories. for d in subdirs: # Some directories are private so skip them from consideration. pat=re.compile( DIR_TO_SKIP ) if pat.search( d ): logging.warning( "Webwalking the master. Skipping private dir %s" % d ) else: logging.debug( "Webwalking the master. Pushing dir %s on the queue." % d ) self.queue.append( d ) # Search through the directories. while len( self.queue ) > 0: # For breadth first search, remove from beginning of queue. if type == BREADTH_FIRST_SEARCH: d = self.queue.pop(0) # For depth first search, remove from end of queue. elif type == DEPTH_FIRST_SEARCH: d = self.queue.pop() else: d = self.queue.pop(0) name = self.stripRoot( d ) logging.debug( "Webwalking the master. Adding relative directory %s to list, full path = %s." % (name, d) ) self.directories.append( name ) self.walk( d ) def stripRoot( self, fileInfo ): """Return a path, but strip off the root directory""" root = self.rootDir # Extract the file name. if isFileInfoType( fileInfo ): name = fileInfo[ FILE_NAME ] else: name = fileInfo # e.g. root = / and name = /Art/foo.txt yields stripped_path = Art/foo.txt # but root = /Sean and name = /Sean/Art/foo.txt yields stripped_path = Art/foo.txt lenroot = len( root ) if root == DEFAULT_ROOT_DIR: pass else: lenroot = lenroot + 1 stripped_path = name[ lenroot: ] if isFileInfoType( fileInfo ): # Update the file name only. return [stripped_path, fileInfo[ FILE_TYPE ], fileInfo[ FILE_DATE_TIME ], fileInfo[ FILE_SIZE ]] else: return stripped_path def appendRootDir( self, rootDir, name ): """Append the root directory to a path""" # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt if rootDir == DEFAULT_ROOT_DIR: return rootDir + name else: return rootDir + "/" + name def scan(self): """Scan the directory tree recursively from the root""" logging.debug( "Webwalking the master: Beginning recursive directory scan from root directory %s" % self.rootDir ) self.walk( self.rootDir ) def modtime( self, f ): """Subclass: Get file modification time""" pass def quit( self ): """Quit web site walking""" logging.debug( "Finished webwalking the master." ) pass def removeDirectory( self, dirName ): """Subclass: Remove a directory""" pass def removeFile( self, fileName ): """Subclass: Remove a file""" pass class MasterWebSite( WebSite ): """Walk the master web directory on local disk down from the root. Clean up temporary files and do other cleanup work.""" def __init__( self ): """Go to web page root and list all files and directories.""" self.rootDir = self.getRootDir() logging.debug( "MasterWebSite.__init__(): \tRoot directory: %s" % self.rootDir) WebSite.__init__( self ) def getRootDir( self ): """Get the name of the root directory""" return getMasterRootDir() def gotoRootDir( self, rootDir ): """Go to the root directory""" # Go to the root directory. logging.debug( "MasterWebSite.gotoRootDir(): \tchdir to root directory: %s" % rootDir) os.chdir( rootDir ) # Read it back. self.rootDir = os.getcwd() logging.debug( "MasterWebSite.gotoRootDir(): \tgetcwd root directory: %s" % self.rootDir ) def oneLevelDown( self, dir ): """List all files and subdirectories in the current directory, dir. For files, collect file info such as time, date and size.""" # Change to current directory. os.chdir( dir ) # List all subdirectories and files. dirList = os.listdir( dir ) dirs = [] files = [] if dirList: for line in dirList: logging.debug( "MasterWebSite.oneLevelDown(): \tlistdir( %s ) = %s" % (dir, line) ) # Add the full path prefix from the root. name = self.appendRootDir( dir, line ) logging.debug( "MasterWebSite.oneLevelDown(): \tmaster dir/file (full path): %s" % name) # Is it a directory or a file? if os.path.isdir( name ): dirs.append( name ) elif os.path.isfile( name ): # First assemble the file information of name, time/date and size into a list. fileInfo = [name, "file", self.getFileDateTime( name ), self.getFileSize( name ) ] files.append( fileInfo ) # Sort the names into order. if dirs: dirs.sort() if files: files.sort() return dirs, files def getFileDateTime( self, fileName ): """Get a local file time and date in UTC.""" fileEpochTime = os.path.getmtime( fileName ) fileTimeUTC = time.gmtime( fileEpochTime )[ 0 : 6 ] # year, month, day, hour, minute, seconds d = datetime.datetime( fileTimeUTC[0], fileTimeUTC[1], fileTimeUTC[2], fileTimeUTC[3], fileTimeUTC[4], fileTimeUTC[5]) return d def getFileSize( self, fileName ): """Get file size in bytes.""" return os.path.getsize( fileName ) def clean( self ): """Scan through all directories and files in the master on disk web site and process them.""" numChanges = 0 logging.debug( "Cleaning up the master web page." ) if self.directories == None or self.files == None: logging.error( "Web site has no directories or files. Aborting..." ) sys.exit() for d in self.directories: # Remove all temporary directories. if self.isTempDir( d ): # Add the full path prefix from the root. name = self.appendRootDir( self.getRootDir(), d ) try: logging.debug( "Removing temp dir %s recursively" % name ) os.removedirs( name ) numChanges += 1 except OSError, e: logging.error( "Cannot remove temp dir %s: %s" % (name, e)) for f in self.files: # Add the full path prefix from the root. name = self.appendRootDir( self.getRootDir(), f[ FILE_NAME ] ) # Remove all temporary files. if self.isTempFile( f ): try: logging.debug( "Removing temp file %s" % name ) os.remove( name ) numChanges += 1 except OSError, e: logging.error( "Cannot remove temp dir %s: %s" % (name, e)) # Update hypertext files. if self.isHypertextFile( f ): changed = self.rewriteHypertextFile( name ) if changed: numChanges += 1 logging.debug( "Rewrote hypertext file %s" % name ) # Update source code files. if self.isSourceFile( f ): changed = self.rewriteSourceFile( name ) if changed: numChanges += 1 logging.debug( "Rewrote source file %s" % name ) # After updating, copy to a text file. self.copyToTextFile( name ) logging.debug( "Created a copy of the source file %s.%s" % (name, TEXT_FILE_EXT)) # Flag that at least one file was changed. if numChanges > 0: return True return False def isTempFile( self, fileInfo ): """Identify a file name as a temporary file""" fileName = fileInfo[ FILE_NAME ] # Suffixes and names for temporary files be deleted. pat = re.compile( TEMP_FILE_SUFFIXES, re.VERBOSE ) match = pat.search( fileName ) # Vim editor temp files contain twiddles. if match or fileName.find( VIM_TEMP_FILE_EXT ) >= 0: return True return False def isTempDir( self, dirName ): """Identify a name as a temporary directory.""" p = re.compile( TEMP_DIR_SUFFIX, re.VERBOSE ) return p.search( dirName ) def isSourceFile( self, fileInfo ): """Identify a source file name.""" fileName = fileInfo[ FILE_NAME ] p = re.compile( SOURCE_FILE_SUFFIXES, re.VERBOSE) return p.search( fileName ) def isHypertextFile( self, fileInfo ): """ Check if the file name is a hypertext file.""" fileName = fileInfo[ FILE_NAME ] p = re.compile( HYPERTEXT_SUFFIX, re.VERBOSE) return p.search( fileName ) def copyToTextFile( self, fileName ): """Make a copy of a file with a .txt extension""" # Remove the old copy with the text file extension. copyFileName = fileName + TEXT_FILE_EXT try: os.remove( copyFileName ) except OSError, e: logging.error( "Cannot remove old text file copy %s: %s" % (copyFileName, e)) # Create the new copy, which is an exact duplicate. self.processLinesOfFile( fileName, copyFileName ) # Make the new copy have the same modification and access time and date as the original # since it is just an exact copy. # That way we won't upload copies with newer times constantly, just because they look as # though they've been recently modified. fileStat = os.stat( fileName ) ; os.utime( copyFileName, (fileStat[stat.ST_ATIME], fileStat[stat.ST_MTIME])) logging.debug( "Reset file time to original time for copy %s" % copyFileName ) ; def rewriteEmailAddressLine( self, line ): """Rewrite lines containing old email addresses.""" # Search for the old email address. pat = re.compile( OLD_EMAIL_ADDRESS, re.VERBOSE | re.IGNORECASE ) match = pat.search( line ) # Replace the old address with my new email address. if match: newAddress = NEW_EMAIL_ADDRESS sub = pat.sub( newAddress, line ) line = sub return line def rewriteVersionLine( self, line ): """Rewrite lines containing old version of software.""" # Search for the old version. pat = re.compile( OLD_SOFTWARE_VERSION, re.VERBOSE | re.IGNORECASE ) match = pat.search( line ) # Replace the old address with my new email address. if match: newVersion = NEW_SOFTWARE_VERSION sub = pat.sub( newVersion, line ) line = sub return line def rewriteCopyrightLine( self, line ): """Rewrite copyright lines if they are out of date.""" # Match the lines, # Copyright (C) nnnn-mmmm by Sean Erik O'Connor. # Copyright © nnnn-mmmm by Sean Erik O'Connor. # and pull out the old year and save it. pat = re.compile( COPYRIGHT_LINE, re.VERBOSE ) # Does it match anywhere? match = pat.search( line ) # Found a match. if match: oldYear = int( match.group( 'oldYear' )) # Replace the old year with the current year. We matched and extracted the # old copyright symbol into the variable 'symbol'. We now insert it back using # the replacement text syntax with \g. if oldYear < getCurrentYear(): newCopyright = 'Copyright \g \g-' + str( getCurrentYear() ) sub = pat.sub( newCopyright, line ) line = sub # Look for the other type of copyright line. else: # Copyright (C) nnnn by Sean Erik O'Connor. # Copyright © mmmm by Sean Erik O'Connor. pat = re.compile( COPYRIGHT_LINE2, re.VERBOSE ) match = pat.search( line ) # Found a match. if match: oldYear = int( match.group( 'oldYear' )) # Replace the old year with the current year. if oldYear < getCurrentYear(): newCopyright = 'Copyright \g ' + str( getCurrentYear() ) sub = pat.sub( newCopyright, line ) line = sub return line def rewriteLastUpdateLine( self, line ): """Rewrite the Last Updated line if the year is out of date.""" # Match the last updated line and pull out the year. # Last updated 12 Aug 07. p = re.compile( LAST_UPDATED_LINE, re.VERBOSE | re.IGNORECASE ) m = p.search( line ) if m: lastUpdateYear = int( m.group( 'year' )) # Convert to four digit years. if lastUpdateYear > 90: lastUpdateYear += 1900 else: lastUpdateYear += 2000 # If the year is old, rewrite to "01 Jan ". if lastUpdateYear < getCurrentYear(): twoDigitYear = TWO_DIGIT_YEAR_FORMAT % getCurrentTwoDigitYear() sub = p.sub( 'last updated 01 Jan ' + twoDigitYear, line ) line = sub return line def rewriteHypertextFile( self, fileName ): """Rewrite copyright lines, last updated lines, etc.""" changed = False # Create a new temporary file name for the rewritten file. tempFileName = fileName + TEMP_FILE_EXT # Apply changes to all lines of the file. if self.processLinesOfFile( fileName, tempFileName, \ [self.rewriteCopyrightLine, self.rewriteLastUpdateLine, \ self.rewriteEmailAddressLine, self.rewriteVersionLine] ): changed = True # Rename the temp file to the original file name. If no changes, just delete the temp file. self.cleanUpTempFile( tempFileName, fileName, changed ) return changed def rewriteSourceFile( self, fileName ): """Rewrite both copyright lines, etc.""" changed = False # Create a temporary file name for the rewritten file. tempFileName = fileName + TEMP_FILE_EXT # Apply changes to all lines of the file. if self.processLinesOfFile( fileName, tempFileName, \ [self.rewriteCopyrightLine, \ self.rewriteEmailAddressLine, self.rewriteVersionLine] ): changed = True # Rename the temp file to the original file name. If no changes, just delete the temp file. self.cleanUpTempFile( tempFileName, fileName, changed ) return changed def cleanUpTempFile( self, tempFileName, fileName, changed ): """Remove the original file, rename the temporary file name to the original name. If there are no changes, just remove the temporary file. """ if changed: # Remove the old file now that we have the rewritten file. try: os.remove( fileName ) logging.debug( "Changes were made. Remove original file %s" % fileName ) except OSError, e: logging.error( "Cannot remove old file %s: %s. Need to remove it manually." % (fileName, e)) # Rename the new file to the old file name. try: os.rename( tempFileName, fileName ) logging.debug( "Rename temp file %s to original file %s" % (tempFileName, fileName) ) except OSError, e: logging.error( "Cannot rename temporary file %s to old file name %s: %s. Need to do it manually" % (tempFileName, fileName, e)) else: # No changes? Remove the temporary file. try: os.remove( tempFileName ) logging.debug( "No changes were made. Remove temporary file %s" % tempFileName ) except OSError, e: logging.error( "Cannot remove temporary file %s: %s. Need to remove it manually." % (tempFileName, e)) return def processLinesOfFile( self, inFileName, outFileName, processLineFunctionList=None ): """Process each line of a file with the processLine() function, creating a new temporary file. """ # Assume no changes. changed = False try: fin = open( inFileName, "r" ) except IOError, e: logging.error( "processLinesOfFile(): \tCannot open file %s for reading: %s" % (inFileName, e)) try: fout = open( outFileName, "w" ) except IOError, e: logging.error( "processLinesOfFile(): \tCannot open file %s for writing: %s" % (outFileName, e)) # Read each line of the file, aborting if there is a read error. try: line = fin.readline() while line: original_line = line if processLineFunctionList == None: # For a simple copy, just duplicate the line unchanged. pass else: # Otherwise, apply changes successively to the line. for processLineFunction in processLineFunctionList: line = processLineFunction( line ) if original_line != line: logging.debug( "Rewrote the line >>>%s<<< to >>>%s<<<" % (original_line, line) ) changed = True fout.write( line ) line = fin.readline() fin.close() fout.close() except IOError, e: logging.error( "File I/O error during reading/writing file %s in processLinesOfFile: %s Aborting..." % (inFileName, e) ) sys.exit() if changed: logging.debug( "processLinesOfFile(): \tRewrote original file %s. Changes are in temporary copy %s" % (inFileName, outFileName)) # Return True if any lines were changed. return changed class RemoteWebSite( WebSite ): """Walk the remote web directory on a web server down from the root.""" def __init__( self, server, user, password, ftproot ): """Connect to FTP server and list all files and directories.""" # Root directory of FTP server. self.rootDir = ftproot logging.debug( "Requesting remote web site ftp root dir %s" % self.rootDir ) # Connect to FTP server and log in. try: #self.ftp.set_debuglevel( 2 ) print "Connecting to ftp server" self.ftp = ftplib.FTP( server ) self.ftp.login( user, password ) except Exception, detail: logging.error( "Remote web site cannot login to ftp server: %s Aborting..." % detail ) sys.exit() else: logging.debug( "Remote web site ftp login succeeded." ) logging.debug( "Remote web site ftp welcome message %s" % self.ftp.getwelcome() ) print "Scanning..." # Construct the superclass. WebSite.__init__( self ) def gotoRootDir( self, root ): """Go to the root directory""" try: # Go to the root directory. self.ftp.cwd( root ) logging.debug( "ftp root directory (requested) = %s" % self.rootDir ) # Read it back. self.rootDir = self.ftp.pwd() logging.debug( "ftp root directory (read back from server): %s" % self.rootDir ) except Exception, detail: logging.error( "gotoRootDir(): \tCannot ftp cwd or pwd root dir %s Aborting..." % (root, detail)) sys.exit() def getRootDir( self ): """Get the root directory name""" return self.rootDir def quit(self): """Quit web site walking""" logging.debug( "Quitting remote site." ) try: self.ftp.quit() except Exception, detail: logging.error( "Cannot ftp quit: %s" % detail) def oneLevelDown( self, dir ): """List files and directories in a subdirectory using ftp""" try: # ftp listing from current dir. logging.debug( "RemoteWebSite.oneLevelDown(): \tftp cwd: %s" % dir) self.ftp.cwd( dir ) dirList = [] self.ftp.retrlines( 'LIST', dirList.append ) except Exception, detail: logging.error( "oneLevelDown(): \tCannot ftp cwd or ftp LIST dir %s: %s Aborting..." % (dir, detail)) sys.exit() dirs = [] files = [] for line in dirList: logging.debug( "RemoteWebSite.oneLevelDown(): \tftp LIST: %s" % line) # Line should at least have the minimum FTP information. if len(line) >= MIN_FTP_LINE_LENGTH: fileInfo = self.getFTPFileInformation( line ) logging.debug( "RemoteWebSite.oneLevelDown(): \tftp parsed file info: %s" % fileInfo) # Prefix the full path prefix from the root to the directory name and add to the directory list. if fileInfo[ FILE_TYPE ] == "directory": dirname = self.appendRootDir( dir , fileInfo[ FILE_NAME ] ) logging.debug( "RemoteWebSite.oneLevelDown(): \tftp dir (full path): %s" % dirname) dirs.append( dirname ) # Add file information to the list of files. else: # Update the file name only: add the full path prefix from the root. fileInfo[ FILE_NAME ] = self.appendRootDir( dir, fileInfo[ FILE_NAME ] ) logging.debug( "RemoteWebSite.oneLevelDown(): \tftp file (full path): %s" % fileInfo) files.append( fileInfo ) else: logging.error( "RemoteWebSite.oneLevelDown(): \tFTP LIST line is too short: %s" % line ) dirs.sort() files.sort() return dirs, files def modtime( self, f ): """Get the modification time of a file""" try: response = self.ftp.sendcmd( 'MDTM ' + f ) # MDTM returns the last modified time of the file in the format # "213 YYYYMMDDhhmmss \r\n # MM is 01 to 12, DD is 01 to 31, hh is 00 to 23, mm is 00 to 59, ss is 0 to 59. # error-response is 550 for info not available, and 500 or 501 if command cannot # be parsed. if response[:3] == '213': time = response[4:] except ftplib.error_perm: time = 0 return time def getFTPFileInformation( self, line ): """Parse the ftp file listing and return file name, datetime and file size""" # Find out if we've a directory or a file. if line[0] == 'd': type = 'directory' else: type = 'file' pattern = re.compile( FTP_LISTING, re.VERBOSE ) # Sensible defaults. filesize = 0 month = 1 day = 1 year = time.gmtime()[0] # Assume the current year if not listed. hour = 0 minute = 0 seconds = 0 filename = "" # Map month names onto numerals. mon_to_num = { 'Jan':1, 'Feb':2, 'Mar':3, 'Apr':4, 'May':5, 'Jun':6, 'Jul':7, 'Aug':8, 'Sep':9, 'Oct':10, 'Nov':11, 'Dec':12 } # Extract time and date from the ftp listing. match = pattern.search( line ) if match: filesize = int( match.group( 'bytes' ) ) month = mon_to_num[ match.group( 'mon' ) ] day = int( match.group( 'day' ) ) # Pull out the year if we have it. Since we have no month or day, # fill them in as Jan 1 above. if match.group( 'year' ): year = int( match.group( 'year' ) ) # Otherwise just get the hour and minute. The year was set already # to the current year above. if match.group( 'hour' ) and match.group( 'min' ): hour = int( match.group( 'hour' ) ) minute = int( match.group( 'min' ) ) filename = match.group( 'filename' ) # Package up the time and date nicely. d = datetime.datetime( year, month, day, hour, minute, seconds ) return [filename, type, d, filesize] class UpdateWeb( object ): """Given previously scanned master and remote directories, update the remote web site.""" def __init__( self, server, user, password, ftproot, m_dirs, m_fileInfos, r_dirs, r_fileInfos ): """Connect to remote site. Accept previously scanned master and remote files and directories.""" # Connect to FTP server and log in. try: print "Connecting to ftp server" self.ftp = ftplib.FTP( server ) self.ftp.login( user, password ) except Exception, detail: logging.error( "Cannot login to ftp server: %s Aborting..." % detail ) sys.exit() else: logging.debug( "ftp login succeeded." ) #if ALTERNATEWEBSITE: self.ftp.set_debuglevel( 2 ) print "Updating..." logging.debug( "ftp server welcome message: %s" % self.ftp.getwelcome() ) # Master root directory. self.masterRootDir = getMasterRootDir() logging.debug( "Master (local to disk) root directory: %s" % self.masterRootDir) # Root directory of FTP server. self.ftpRootDir = ftproot logging.debug( "ftp root directory (requested) = %s" % self.ftpRootDir ) try: # Go to the root directory. self.ftp.cwd( self.ftpRootDir ) # Read it back. self.ftpRootDir = self.ftp.pwd() logging.debug( "ftp root directory (read back from server): %s" % self.ftpRootDir ) except Exception, detail: logging.error( "UpdateWeb(): \tCannot ftp cwd or ftp LIST dir %s Aborting..." % (self.ftpRootDir, detail)) self.m_dirs = m_dirs self.r_dirs = r_dirs self.m_fileInfos = m_fileInfos self.r_fileInfos = r_fileInfos def appendRootDir( self, rootDir, name ): """Append the root directory to a path""" # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt if rootDir == DEFAULT_ROOT_DIR: return rootDir + name else: return rootDir + "/" + name def fileInfo( self ): """Extract file names from file information. Map file names onto file dates and times.""" # Extract file names. self.m_files = [ fileInfo[ FILE_NAME ] for fileInfo in self.m_fileInfos ] self.r_files = [ fileInfo[ FILE_NAME ] for fileInfo in self.r_fileInfos ] # Dictionary mapping file names onto datetimes. self.m_file_to_datetime = dict( [ (fileInfo[ FILE_NAME ], fileInfo[ FILE_DATE_TIME ]) for fileInfo in self.m_fileInfos ] ) self.r_file_to_datetime = dict( [ (fileInfo[ FILE_NAME ], fileInfo[ FILE_DATE_TIME ]) for fileInfo in self.r_fileInfos ] ) # Dictionary mapping master file names onto sizes. self.m_file_to_size = dict( [ (fileInfo[ FILE_NAME ], fileInfo[ FILE_SIZE ]) for fileInfo in self.m_fileInfos ] ) def update( self ): """Scan through the master web site, cleaning it up. Go to remote web site on my servers and synchronize all files. """ self.fileInfo() # Which files and directories are different. self.changes() # Synchronize with the master. self.synchronize() def changes( self ): """Find the set of different directories and files on master and remote.""" # Enter master and remote directories into the dictionary. dir_to_type = {} dir_to_type = dict( [ [ dir, 'm' ] for dir in self.m_dirs ] ) for dir in self.r_dirs: # Initial dictionary contains only master keys. if dir_to_type.has_key( dir ): dir_to_type[ dir ] = 'mr' else: dir_to_type[ dir ] = 'r' # Enter master and remote files into the dictionary. file_to_type = {} file_to_type = dict( [ [ file, 'm' ] for file in self.m_files ] ) for file in self.r_files: # Initial dictionary contains only master keys. if file_to_type.has_key( file ): file_to_type[ file ] = 'mr' else: file_to_type[ file ] = 'r' logging.debug( "Raw dictionary dump of directories" ) for k, v in dir_to_type.iteritems(): logging.debug( "\t dir: %s type: %s" % (k,v)) logging.debug( "Raw dictionary dump of files" ) for k, v in file_to_type.iteritems(): logging.debug( "\t file: %s type: %s" % (k,v)) # Scan through master directories keeping the ordering. self.masterOnlyDirs = [] for dir in self.m_dirs: if dir_to_type[ dir ] == 'm': self.masterOnlyDirs.append( dir ) # Scan through remote directories keeping the ordering. self.remoteOnlyDirs = [] for dir in self.r_dirs: if dir_to_type[ dir ] == 'r': self.remoteOnlyDirs.append( dir ) # We don't care about common directories. # Scan through master files for master only keeping the ordering. self.masterOnlyFiles = [] for file in self.m_files: if file_to_type[ file ] == 'm': self.masterOnlyFiles.append( file ) # Scan through remote files for remote only keeping the ordering. self.remoteOnlyFiles = [] for file in self.r_files: if file_to_type[ file ] == 'r': self.remoteOnlyFiles.append( file ) # Scan through master files for common files keeping the ordering. self.commonFiles = [] for file in self.m_files: if file_to_type[ file ] == 'mr': self.commonFiles.append( file ) logging.debug( "*** Master only directories ******************************" ) for dir in self.masterOnlyDirs: logging.debug( "\t %s" % dir ) logging.debug( "*** Remote only directories ******************************" ) for dir in self.remoteOnlyDirs: logging.debug( "\t %s" % dir ) logging.debug( "*** Master only files ******************************" ) for file in self.masterOnlyFiles: logging.debug( "\t %s" % file ) logging.debug( "*** Remote only files ******************************" ) for file in self.remoteOnlyFiles: logging.debug( "\t %s" % file ) logging.debug( "*** Common files ******************************" ) for file in self.commonFiles: logging.debug( "\tname %s master time %s remote time %s" % (file, self.m_file_to_datetime[ file ], self.r_file_to_datetime[ file ])) def synchronize( self ): """ Synchronize files in the remote directory with the master directory. """ # Compare the common files for time and date. for f in self.commonFiles: m_time = self.m_file_to_datetime[ f ] r_time = self.r_file_to_datetime[ f ] # How many fractional days different are we? days_different = abs( (r_time - m_time).days + (r_time - m_time).seconds / (60.0 * 60.0 * 24.0) ) upload = False logging.debug( "Common file: %s." % f ) # Remote file time is newer. if r_time > m_time: # Remote file time is MUCH newer. if (days_different >= DAYS_NEWER_FOR_REMOTE_BEFORE_WE_SUSPECT_ITS_ACTUALLY_VERY_OLD): logging.warning( "Remote file %s is suspiciously much newer by %f days. Preparing for upload." % (f, days_different)) logging.warning( "\tmaster time %s remote time %s" % ( m_time, r_time) ) upload = True # Remote file time is slightly newer. else: logging.debug( "Remote file %s is slightly newer by %f days. Probably an old file ftp'd recently." % (f,days_different)) logging.debug( "\tmaster time %s remote time %s" % ( m_time, r_time) ) upload = False # Master file time is newer. Upload it. elif m_time > r_time: if (days_different >= DAYS_NEWER_FOR_MASTER_BEFORE_UPLOAD): logging.warning( "Master file %s is newer by %f days. Preparing for upload." % (f, days_different ) ) logging.warning( "\tmaster time %s remote time %s" % ( m_time, r_time) ) upload = True else: logging.debug( "Master file %s is only newer by %f days. Do not upload." % (f, days_different) ) logging.debug( "\tmaster time %s remote time %s" % ( m_time, r_time) ) upload = False size = self.m_file_to_size[ f ] if size >= FILE_SIZE_LIMIT: logging.error( "upload(): Skipping upload of file %s of size %d; too large for server, limit is %d" % \ (f, size, FILE_SIZE_LIMIT) ) upload = False if upload: print "Upload changed %s" % f self.upload( f ) # Remote directory is not in master. Delete it. for d in self.remoteOnlyDirs: logging.debug( "Remote only dir. Attempting to delete it: %s" % d ) print "Delete directory %s" % f self.rmdir( d ) # Master directory missing on remote. Create it. # Due to breadth first order scan, we'll create parent dirs before child dirs. for d in self.masterOnlyDirs: logging.debug( "Master only dir. Creating dir %s on remote." % d ) print "Create new directory %s" % d self.mkdir( d ) # Master file file missing on remote. Upload it. for f in self.masterOnlyFiles: logging.debug( "Master only file. Uploading %s to remote." % f ) size = self.m_file_to_size[ f ] if size >= FILE_SIZE_LIMIT: logging.error( "upload(): Skipping upload of file %s of size %d; too large for server, limit is %d" % \ (f, size, FILE_SIZE_LIMIT) ) else: print "Upload new file %s" % f self.upload( f ) # Remote contains a file not present on the master. Delete the file. for f in self.remoteOnlyFiles: logging.debug( "Remote only file. Deleting remote file %s." % f ) print "Delete file %s" % f self.delRemote( f ) def delRemote( self, relativeFilePath ): """Delete a file using ftp.""" logging.debug( "delRemote(): \trelative file path name: %s" % relativeFilePath ) # Parse the relative file path into file name and relative directory. relativeDir, fileName = os.path.split( relativeFilePath ) logging.debug( "delRemote(): \tfile name: %s" % fileName ) logging.debug( "delRemote(): \trelative dir: %s" % relativeDir ) logging.debug( "delRemote(): \tremote root dir: %s" % self.ftpRootDir ) try: # Add the remote root path and go to the remote directory. remoteDir = self.appendRootDir( self.ftpRootDir, relativeDir ) logging.debug( "delRemote(): \tftp cd remote dir: %s" % remoteDir ) self.ftp.cwd( remoteDir ) except Exception, detail: logging.error( "delRemote(): \tCannot ftp chdir: %s Skipping..." % detail) else: try: logging.debug( "delRemote(): \tftp rm: %s" % fileName ) # Don't remove zero length file names. if len( fileName ) > 0: self.ftp.delete( fileName ) else: logging.warning( "delRemote(): skipping ftp delete; file NAME %s had zero length" % fileName ) except Exception, detail: logging.error( "delRemote(): \tCannot ftp rm: %s" % detail) def mkdir( self, relativeDir ): """Create new remote directory using ftp.""" logging.debug( "mkdir(): \trelative dir path name: %s" % relativeDir ) logging.debug( "mkdir(): \tremote root dir: %s" % self.ftpRootDir ) # Parse the relative dir path into prefix dir and suffix dir. path, dir = os.path.split( relativeDir ) logging.debug( "mkdir(): \tremote prefix dir: %s" % path ) logging.debug( "mkdir(): \tremote dir: %s" % dir ) try: # Add the remote root path and go to the remote directory. remoteDir = self.appendRootDir( self.ftpRootDir, path ) logging.debug( "mkdir(): \tftp cd remote dir: %s" % remoteDir ) self.ftp.cwd( remoteDir ) except Exception, detail: logging.error( "mkdir(): \tCannot ftp chrdir: %s Skipping..." % detail) else: try: logging.debug( "mkdir(): \tftp mkd: %s" % dir ) self.ftp.mkd( dir ) except Exception, detail: logging.error( "mkdir(): \tCannot ftp mkdir: %s" % detail) def rmdir( self, relativeDir ): """Delete an empty directory using ftp.""" logging.debug( "rmdir(): \tintermediate dir path name: %s" % relativeDir ) logging.debug( "rmdir(): \tremote root dir: %s" % self.ftpRootDir ) # Parse the relative dir path into prefix dir and suffix dir. path, dir = os.path.split( relativeDir ) logging.debug( "rmdir(): \tremote prefix dir: %s" % path ) logging.debug( "rmdir(): \tremote dir: %s" % dir ) try: # Add the remote root path and go to the remote directory. remoteDir = self.appendRootDir( self.ftpRootDir, path ) logging.debug( "rmdir(): \tftp cd remote dir: %s" % remoteDir ) self.ftp.cwd( remoteDir ) except Exception, detail: logging.error( "rmdir(): \tCannot ftp chdir: %s Skipping..." % detail) else: try: logging.debug( "rmdir(): \tftp rmd: %s" % dir ) self.ftp.rmd( dir ) except Exception, detail: logging.warning( "rmdir(): \tCannot ftp rmdir dir %s: %s Directory is probably not empty. Do a manual delete." % (dir, detail)) def download( self, relativeFilePath ): """Download a binary file using ftp.""" logging.debug( "download(): \tfile name: %s" % relativeFilePath ) # Parse the relative file path into file name and relative directory. relativeDir, fileName = os.path.split( relativeFilePath ) logging.debug( "download(): \tfile name: %s" % fileName ) logging.debug( "download(): \trelative dir: %s" % relativeDir ) logging.debug( "download(): \troot dir: %s" % self.ftpRootDir ) # Add the remote root path and go to the remote directory. remoteDir = self.appendRootDir( self.ftpRootDir, relativeDir ) logging.debug( "download(): \tftp cd remote dir: %s" % remoteDir ) try: self.ftp.cwd( remoteDir ) except Exception, detail: logging.error( "download(): \tCannot ftp chdir: %s Skipping..." % detail) else: # Add the master root path to get the local file name. # Open local binary file to write into. localFileName = self.appendRootDir( self.masterRootDir, relativeFilePath ) logging.debug( "download(): \topen local file name: %s" % localFileName ) try: f = open( localFileName, "wb" ) try: # Calls f.write() on each block of the binary file. #ftp.retrbinary( "RETR " + fileName, f.write ) pass except Exception, detail: logging.error( "download(): \tCannot cannot ftp retrbinary: %s" % detail) f.close() except IOError, e: logging.error( "download(): \tCannot open local file %s for reading: %s" % (localFileName, e)) def upload( self, relativeFilePath ): """Upload a binary file using ftp.""" logging.debug( "upload(): \trelative file path name: %s" % relativeFilePath ) # Parse the relative file path into file name and relative directory. relativeDir, fileName = os.path.split( relativeFilePath ) logging.debug( "upload(): \tfile name: %s" % fileName ) logging.debug( "upload(): \trelative dir: %s" % relativeDir ) logging.debug( "upload(): \tremote root dir: %s" % self.ftpRootDir ) # Add the remote root path and go to the remote directory. remoteDir = self.appendRootDir( self.ftpRootDir, relativeDir ) logging.debug( "upload(): \tftp cd remote dir: %s" % remoteDir ) try: self.ftp.cwd( remoteDir ) except Exception, detail: logging.error( "upload(): \tCannot ftp chdir: %s Skipping..." % detail) else: # Add the master root path to get the local file name. # Open local binary file to read from. localFileName = self.appendRootDir( self.masterRootDir, relativeFilePath ) logging.debug( "upload(): \topen local file name: %s" % localFileName ) try: f = open( localFileName, "rb" ) try: # f.read() is called on each block of the binary file until EOF. logging.debug( "upload(): \tftp STOR file %s" % fileName ) self.ftp.storbinary( "STOR " + fileName, f ) except Exception, detail: logging.error( "upload(): \tCannot ftp storbinary: %s" % detail) f.close() except IOError, e: logging.error( "upload(): \tCannot open local file %s for reading: %s" % (localFileName, e)) def quit(self): """Log out of an ftp session""" logging.debug( "UpdateWeb::quit()" ) try: self.ftp.quit() except Exception, detail: logging.error( "Cannot ftp quit because %s" % detail) # Call the main program. if __name__ == '__main__': main()