1 #!/usr/bin/env python3
   2 #============================================================================
   3 #
   4 # NAME
   5 #
   6 #     updateweb.py
   7 #
   8 # DESCRIPTION
   9 #
  10 #     Python script which updates my web sites.
  11 #
  12 #     It does miscellaneous cleanup on my master copy of the web site on disk,
  13 #     including updating copyright information, then synchronizes the master
  14 #     copy to my remote server web sites using FTP.
  15 #
  16 # USAGE
  17 #
  18 #     It's best to use the associated makefile.
  19 #     But you can call this Python utility from the command line,
  20 #
  21 #     $ python updateweb.py          Clean up my master copy, then use it
  22 #                                    to update my remote web server site.
  23 #                                    Log warnings and errors.
  24 #     $ python updateweb.py -v       Same, but log debug messages also.
  25 #     $ python updateweb.py -c       Clean up my master copy only.
  26 #     $ python updateweb.py -t       Run unit tests only.
  27 #
  28 #     We get username and password information from the file PARAMETERS_FILE.
  29 #
  30 #     Logs are written to the files,
  31 #
  32 #         logMaster.txt       Master web site cleanup log.
  33 #         logRemote.txt       Remote web server update log.
  34 #
  35 # AUTHOR
  36 #
  37 #     Sean E. O'Connor        23 Aug 2007  Version 1.0 released.
  38 #     Sean E. O'Connor        18 May 2013  Version 4.2 released.
  39 #     Sean E. O'Connor        07 Nov 2015  Version 4.3 released.
  40 #     Sean E. O'Connor        22 Nov 2015  Version 4.4 released.
  41 #     Sean E. O'Connor        07 Feb 2017  Version 4.5 released.
  42 #     Sean E. O'Connor        04 Jun 2017  Version 4.6 released.
  43 #     Sean E. O'Connor        17 Dec 2017  Version 4.7 released.
  44 #     Sean E. O'Connor        15 Jan 2018  Version 4.8 released.
  45 #     Sean E. O'Connor        05 Jan 2019  Version 5.0 released.
  46 #
  47 # LEGAL
  48 #
  49 #     updateweb.py Version 5.0 - A Python utility program which maintains my web site.
  50 #     Copyright (C) 2007-2019 by Sean Erik O'Connor.  All Rights Reserved.
  51 #
  52 #     This program is free software: you can redistribute it and/or modify
  53 #     it under the terms of the GNU General Public License as published by
  54 #     the Free Software Foundation, either version 3 of the License, or
  55 #     (at your option) any later version.
  56 #
  57 #     This program is distributed in the hope that it will be useful,
  58 #     but WITHOUT ANY WARRANTY; without even the implied warranty of
  59 #     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  60 #     GNU General Public License for more details.
  61 #
  62 #     You should have received a copy of the GNU General Public License
  63 #     along with this program.  If not, see <http://www.gnu.org/licenses/>.
  64 #
  65 #     The author's address is artificer!AT!seanerikoconnor!DOT!freeservers!DOT!com
  66 #     with !DOT! replaced by . and the !AT! replaced by @
  67 #
  68 # NOTES
  69 #
  70 #    DOCUMENTATION
  71 #
  72 #    Python interpreter:               http://www.python.org
  73 #    Python tutorial and reference:    htttp://docs.python.org/lib/lib.html
  74 #    Python debugger:                  https://docs.python.org/3/library/pdb.html
  75 #    Python regular expression howto:  http://www.amk.ca/python/howto/regex/
  76 #
  77 # SAMPLE DEBUGGING OF THIS SCRIPT
  78 #
  79 #    Run Python
  80 #
  81 #        seanoconnor:~/Desktop/Sean/WebSite/WebDesign/MaintainWebPage$ python
  82 #        Python 3.7.1 (v3.7.1:2c5fed86e0, Oct  3 2017, 00:32:08) 
  83 #        [GCC 4.2.1 (Apple Inc. build 5666) (dot 3)] on darwin
  84 #        Type "help", "copyright", "credits" or "license" for more information.
  85 #
  86 #    Import the debugger module, 
  87 #
  88 #        >>> import pdb
  89 #
  90 #    Import the entire script,
  91 #
  92 #        >>> import updateweb
  93 #
  94 #    Set up the debugger to run starting in the main() function,
  95 #
  96 #        >>> pdb.run('updateweb.main()')
  97 #        > <string>(1)<module>()
  98 #
  99 #    Import the class you want to debug within
 100 #
 101 #        (Pdb) from updateweb import WebSite
 102 #
 103 #    Set breakpoints
 104 #
 105 #        (Pdb) b WebSite.clean
 106 #        Breakpoint 1 at /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py:1049
 107 #
 108 #        (Pdb) b WebSite.isTempFile
 109 #        Breakpoint 2 at /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py:1108
 110 #
 111 #    List the breakpoints,
 112 #
 113 #        (Pdb) b
 114 #        Num Type         Disp Enb   Where
 115 #        1   breakpoint   keep yes   at /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py:1049
 116 #        2   breakpoint   keep yes   at /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py:1108
 117 #
 118 #    Start running the program.  We stop at the breakpoint,
 119 #
 120 #        (Pdb) c
 121 #        
 122 #            updateweb Version 5.0 - A Python utility program which maintains my web site.
 123 #            Copyright (C) 2007-2019 by Sean Erik O'Connor.  All Rights Reserved.
 124 #        
 125 #            It deletes temporary files, rewrites old copyright lines and email address
 126 #            lines in source files, then synchronizes all changes to my web sites.
 127 #        
 128 #            updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
 129 #            GNU General Public License.  This is free software, and you are welcome
 130 #            to redistribute it under certain conditions; see the GNU General Public
 131 #            License for details.
 132 #            Scanning and cleaning local web site...> /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py(1051)clean()
 133 #
 134 #    Keep going,
 135 #
 136 #            (Pdb) c
 137 #            > /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py(1111)isTempFile()
 138 #            -> fileName = fileInfo[ self.userSettings.FILE_NAME ]
 139 #
 140 #    List the source lines
 141 #
 142 #            (Pdb) l 1108
 143 #            1103           if numChanges > 0:
 144 #            1104               return True
 145 #            1105   
 146 #            1106           return False
 147 #            1107   
 148 #            1108B      def isTempFile( self, fileInfo ):
 149 #            1109           """Identify a file name as a temporary file"""
 150 #            1110   
 151 #            1111 ->            fileName = fileInfo[ self.userSettings.FILE_NAME ]
 152 #            1112   
 153 #            1113           # Suffixes and names for temporary files be deleted.
 154 #            (Pdb) l
 155 #            1114           [pat, match] = patternMatch( self.userSettings.TEMP_FILE_SUFFIXES, fileName )
 156 #            1115           # Remove any files containing twiddles anywhere in the name.
 157 #            1116           if match or fileName.find( self.userSettings.VIM_TEMP_FILE_EXT ) >= 0:
 158 #            1117               return True
 159 #            1118   
 160 #            1119           return False
 161 #            1120   
 162 #            1121       def isTempDir( self, dirName ):
 163 #            1122           """Identify a name as a temporary directory."""
 164 #            1123   
 165 #            1124           p = re.compile( self.userSettings.TEMP_DIR_SUFFIX, re.VERBOSE )
 166 #            (Pdb) 
 167 #
 168 #    What we really want is to set a break at this line if the condition matches
 169 #
 170 #            (Pdb) b 1116
 171 #            Breakpoint 3 at /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py:1116
 172 #
 173 #            (Pdb) b
 174 #            Num Type         Disp Enb   Where
 175 #            1   breakpoint   keep yes   at /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py:1049
 176 #               breakpoint already hit 1 time
 177 #            2   breakpoint   keep yes   at /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py:1108
 178 #               breakpoint already hit 1 time
 179 #            3   breakpoint   keep yes   at /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py:1116
 180 #
 181 #            (Pdb) condition 3 fileName.find( self.userSettings.VIM_TEMP_FILE_EXT ) >= 0
 182 #            New condition set for breakpoint 3.
 183 #
 184 #    So let's disable the other breakpoints,
 185 #            
 186 #            (Pdb) disable 1 2
 187 #            Disabled breakpoint 1 at /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py:1049
 188 #            Disabled breakpoint 2 at /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py:1108
 189 #
 190 #            (Pdb) b
 191 #            Num Type         Disp Enb   Where
 192 #            1   breakpoint   keep no    at /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py:1049
 193 #               breakpoint already hit 1 time
 194 #            2   breakpoint   keep no    at /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py:1108
 195 #               breakpoint already hit 1 time
 196 #            3   breakpoint   keep yes   at /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py:1116
 197 #               stop only if fileName.find( self.userSettings.VIM_TEMP_FILE_EXT ) >= 0
 198 #
 199 #    Continue until the condition fires,
 200 #
 201 #            (Pdb) c
 202 #            > /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py(1116)isTempFile()
 203 #            -> if match or fileName.find( self.userSettings.VIM_TEMP_FILE_EXT ) >= 0:
 204 #
 205 #            (Pdb) l
 206 #            1111           fileName = fileInfo[ self.userSettings.FILE_NAME ]
 207 #            1112   
 208 #            1113           # Suffixes and names for temporary files be deleted.
 209 #            1114           [pat, match] = patternMatch( self.userSettings.TEMP_FILE_SUFFIXES, fileName )
 210 #            1115           # Remove any files containing twiddles anywhere in the name.
 211 #            1116B->            if match or fileName.find( self.userSettings.VIM_TEMP_FILE_EXT ) >= 0:
 212 #            1117               return True
 213 #            1118   
 214 #            1119           return False
 215 #
 216 #    Print a few variables
 217 #
 218 #            (Pdb) p fileName
 219 #            'WebDesign/MaintainWebPage/.updateweb.py.un~'
 220 #
 221 #    We do the right thing for this file,
 222 #
 223 #            (Pdb) n
 224 #            > /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py(1117)isTempFile()
 225 #            -> return True
 226 #
 227 #    Backtrace shows the calling stack,
 228 #
 229 #            (Pdb) bt
 230 #              /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/bdb.py(431)run()
 231 #            -> exec(cmd, globals, locals)
 232 #              <string>(1)<module>()
 233 #              /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py(735)main()
 234 #            -> changed = master.clean()
 235 #              /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py(1077)clean()
 236 #            -> if self.isTempFile( f ):
 237 #            > /Users/seanoconnor/Desktop/Sean/WebSite/WebDesign/MaintainWebPage/updateweb.py(1116)isTempFile()
 238 #            -> if match or fileName.find( self.userSettings.VIM_TEMP_FILE_EXT ) >= 0:
 239 #            #
 240 #
 241 #    Keep going until we finish the whole program,
 242 #
 243 #            (Pdb) c
 244 #            ...done!
 245 #            Scanning remote web site......done!
 246 #            Synchronizing remote and local web sites...Uploading changed file StyleSheet.css...
 247 #            Uploading changed file Art/Art.html...Uploading changed file WebDesign/MaintainWebPage/updateweb.py...
 248 #            Creating new remote directory WebDesign/MaintainWebPage/__pycache__......done!
 249 #
 250 #    Quit Python and exit,
 251 #
 252 #            >>> quit()
 253 #
 254 #============================================================================
 255 
 256 #----------------------------------------------------------------------------
 257 #  Load Python Packages
 258 #----------------------------------------------------------------------------
 259 
 260 # OS stuff
 261 import sys
 262 import os
 263 import platform
 264 import optparse
 265 import shutil
 266 
 267 # Regular expressions
 268 import re
 269 
 270 # FTP stuff
 271 import ftplib
 272 
 273 # Date and time
 274 import time
 275 import stat
 276 import datetime
 277 
 278 # Logging
 279 import logging
 280 
 281 # Unit testing
 282 import unittest
 283 
 284 # Enumerated types (v3.4)
 285 from enum import Enum
 286 
 287 
 288 #----------------------------------------------------------------------------
 289 #  User settings.
 290 #----------------------------------------------------------------------------
 291 
 292 # Enum types for how to walk the directory tree.
 293 class TreeWalk( Enum ):
 294     BREADTH_FIRST_SEARCH = 1
 295     DEPTH_FIRST_SEARCH   = 2
 296 
 297 # 'Enum' types for properties of directories and files.
 298 class FileType( Enum ):
 299     DIRECTORY                 = 0
 300     FILE                      = 1
 301     ON_MASTER_ONLY            = 2
 302     ON_REMOTE_ONLY            = 3
 303     ON_BOTH_MASTER_AND_REMOTE = 4
 304 
 305 # Megatons of user selectable settings.
 306 class UserSettings:
 307     # Logging control.
 308     LOGFILENAME = ""
 309     VERBOSE          = False  # Verbose mode.  Prints out everything.
 310     CLEANONLY        = False  # Clean the local master web site only.
 311     UNITTEST         = False  # Run a unit test of a function.
 312 
 313     # When diving into the MathJax directory, web walking the deep directories
 314     # may exceed Python's default recursion limit of 1000.
 315     RECURSION_DEPTH = 5000
 316     sys.setrecursionlimit( RECURSION_DEPTH )
 317 
 318     # Fields in the file information (fileInfo) structure.
 319     FILE_NAME      = 0
 320     FILE_TYPE      = 1
 321     FILE_DATE_TIME = 2
 322     FILE_SIZE      = 3
 323 
 324     # Parameter file which contains web server account login information for FTP.
 325     PARAMETERS_FILE = "/private/param.txt"
 326 
 327     # Line numbers in the PARAMETERS_FILE, starting from 0.  All other lines are comments, and are skipped.
 328     SERVER              = 19
 329     USER                = 20
 330     PASSWORD            = 21
 331     FTP_ROOT            = 22
 332     FILE_SIZE_LIMIT     = 23
 333 
 334     # Map month names onto numbers.
 335     monthToNumber = { 'Jan':1, 'Feb':2, 'Mar':3, 'Apr':4, 'May':5, 'Jun':6, 'Jul':7, 'Aug':8, 'Sep':9, 'Oct':10, 'Nov':11, 'Dec':12 }
 336 
 337     # List of directories to skip over when updating the web page.  They will not be uploaded to the web host.
 338     # They will be listed as WARNINGs in the log. 
 339     # Examples:
 340     #     Git local admin directories
 341     #     Private admin settings.
 342     #     MathJax because it has thousands of small files and doesn't change often.
 343     DIR_TO_SKIP     = "private|\.git|\.svn|\.idea|MathJax|build|Debug|Release"
 344 
 345     # List of files to skip over when updating the web page.  They will not be uploaded to the web host.
 346     # They will be listed as WARNINGs in the log. 
 347     # Examples:
 348     #     SVN setttings, 
 349     #     .htaccess (because it doesn't show up on the output of ftp LIST, so we must upload manually)
 350     FILE_TO_SKIP    = "\.svnignore|\.htaccess"
 351 
 352     # File extension for text files.
 353     TEXT_FILE_EXT  = ".txt"
 354 
 355     # Suffixes for temporary files which will be deleted during the cleanup phase.
 356     TEMP_FILE_SUFFIXES = r"""        # Use Python raw strings.
 357         \.                           # Match the dot in the file name.
 358                                      # Now begin matching the file name suffix.
 359                                      # (?: non-capturing match for the regex inside the parentheses, i.e. matching string cannot be retrieved later.
 360                                      # Now match any of the following file extensions:
 361         (?: o   | obj | lib | exe |  #     Object files generated by C, C++, etc compilers
 362                               pyc |  #     Object file generated by the Python compiler
 363                   ilk | pdb | sup |  #     Temp files from VC++ compiler
 364             idb | ncb | opt | plg |  #     Temp files from VC++ compiler
 365             sbr | bsc | map | bce |  #     Temp files from VC++ compiler
 366             res | aps | dep | db  |  #     Temp files from VC++ compiler
 367                               jbf |  #     Paintshop Pro
 368                       class | jar |  #     Java compiler
 369                               log |  #     WS_FTP
 370                               fas |  #     CLISP compiler
 371                         swp | swo |  #     Vim editor
 372                               aux |  #     TeX auxilliary files.
 373           DS_Store  | _\.DS_Store |  #     Mac OS finder folder settings.
 374                        _\.Trashes |  #     Mac OS recycle bin
 375         gdb_history)                 #     GDB history
 376         $                            #     Now we should see only the end of line.
 377         """
 378 
 379     # Special case:  Vim temporary files contain a twiddle anywhere in the name.
 380     VIM_TEMP_FILE_EXT = "~"
 381 
 382     # Suffixes for temporary directories which should be deleted during the cleanup phase.
 383     TEMP_DIR_SUFFIX   = r"""
 384         (?: Debug | Release |        # C++ compiler
 385            ipch   | \.vs    |        # Temp directories from VC++ compiler
 386         \.Trashes | \.Trash)         # Mac OS recycle bin
 387         $
 388         """
 389 
 390     # File extension for an internally created temporary file.
 391     TEMP_FILE_EXT     = ".new"
 392 
 393     # Suffixes for HTML hypertext and CSS style sheet files.
 394     SOURCE_FILE_SUFFIX=r"""
 395         (?: makefile$               # Any file called makefile is a source file.
 396           |
 397           (\.                       # Match the filename suffix after the .
 398                                     # Now match any of these suffixes:
 399              (?: html | htm |           #     HTML hypertext
 400                   css |                 #     CSS style sheet
 401                   c | cpp | h | hpp |   #     C++ and C
 402                   js |                  #     Javascript
 403                   py |                  #     Python
 404                   lsp |                 #     LISP
 405                   m  |                  #     MATLAB
 406                   FOR | for | f |       #     FORTRAN
 407                   txt | dat |           #     Data files
 408                   sh | bashrc |         #     Bash
 409                   bash_profile | 
 410                   bash_logout)
 411              $)
 412          )
 413          """
 414 
 415     # Update my email address.
 416     # This is tricky:  Prevent matching and updating the name within in this Python source file by using the character class brackets.
 417     OLD_EMAIL_ADDRESS= r"""
 418         artifex\!AT\!sean[e]rikoconnor\!DOT\!freeservers\!DOT\!com
 419         """
 420     NEW_EMAIL_ADDRESS="artificer!AT!seanerikoconnor!DOT!freeservers!DOT!com"
 421 
 422     # Rewrite a line by replacing an old substring with a new substring.
 423     OLD_SUBSTRING=r"""
 424                <script\s+type='text/javascript'>
 425                """
 426 
 427     NEW_SUBSTRING="<script>"
 428 
 429     # Change current software version to new software version for all lines in files of the form,
 430     #      Primpoly Version nnnn.nnnn
 431     CURRENT_SOFTWARE_VERSION= r"""
 432         Primpoly
 433         \s+
 434         Version
 435         \s+
 436         ([0-9]+)   # The two part version number NNN.nnn
 437         \.
 438         ([0-9]+)
 439         """
 440     NEW_SOFTWARE_VERSION="Primpoly Version 14.0"
 441 
 442     # Match a copyright line.  Then extract the copyright symbol which can be (C) or &copy; and extract the old year.
 443     TWO_DIGIT_YEAR_FORMAT="%02d"
 444     COPYRIGHT_LINE= r"""
 445         Copyright                       # Copyright.
 446         \D+                             # Any non-digits.
 447         (?P<symbol> \(C\) | &copy;)     # Match and extract the copyright symbol.
 448         \D+                             # Any non-digits.
 449         (?P<oldYear>[0-9]+)             # Match and extract the old copyright year, then place it into variable 'oldYear'
 450         -                               # to
 451         ([0-9]+)                        # New copyright year.
 452         """
 453 
 454     # Match a line containing the words,
 455     #    last updated YY
 456     # and extract the two digit year YY.
 457     LAST_UPDATED_LINE=r"""
 458         last\s+         # Match the words "last updated"
 459         updated\s+
 460         \d+             # Day number
 461         \s+             # One or more blanks or tabs
 462         [A-Za-z]+       # Month
 463         \s+             # One or more blanks or tabs
 464         (?P<year>\d+)   # Two digit year.  Place it into the variable 'year'
 465         """
 466 
 467     # Web server root directory.
 468     DEFAULT_ROOT_DIR   = "/"
 469 
 470     # The ftp listing occasionally shows the wrong date when we are near New Year's Day.
 471     # When do we ignore?
 472     DAYS_NEWER_FOR_REMOTE_NEW_YEARS_GLITCH = 360
 473 
 474     # Upload only if we are newer by more than a few minutes.  Allows for a little slop in time stamps on server or host.
 475     MINUTES_NEWER_FOR_MASTER_BEFORE_UPLOAD = 5.0
 476     DAYS_NEWER_FOR_MASTER_BEFORE_UPLOAD = (1.0 / 24.0) * (1.0 / 60.0) * MINUTES_NEWER_FOR_MASTER_BEFORE_UPLOAD
 477 
 478     # An ftp list command line should be at least this many chars, or we'll suspect and error.
 479     MIN_FTP_LINE_LENGTH = 7
 480 
 481     # Parse an ftp listing, extracting <bytes> <mon> <day> <hour> <min> <year> <filename>
 482     # ftp listings are generally similar to UNIX ls -l listings.
 483     #
 484     # Some examples:
 485     #
 486     # (1) Freeservers ftp listing,
 487     #
 488     #          0        1   2                3           4    5   6   7      8
 489     #     drwxr-xr-x    3 1000             1000         4096 Nov 18  2006 Electronics
 490     #     -rw-r--r--    1 1000             1000        21984 Jun  4 03:46 StyleSheet.css
 491     #     -rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html
 492     #
 493     # (2) atspace ftp listing,
 494     #
 495     #     drwxr-xr-x    3  seanerikoconnor vusers         49 Apr  7  2006 Electronics
 496     #     -rw-r--r--    1  seanerikoconnor vusers      21984 Jun  4 04:03 StyleSheet.css
 497     #
 498     FTP_LISTING= r"""
 499         [drwx-]+            # Unix type file mode.
 500         \s+                 # One or more blanks or tabs.
 501         \d+                 # Number of links.
 502         \s+
 503         \w+                 # Owner.
 504         \s+
 505         \w+                 # Group.
 506         \s+
 507         (?P<bytes> \d+)     # File size in bytes, placed into the variable 'bytes'.
 508         \s+
 509         (?P<mon> \w+)       # Month modified, placed into the variable 'mon'.
 510         \s+
 511         (?P<day> \d+)       # Day modified, placed into the variable 'day'.
 512         \s+
 513         (
 514             (?P<hour> \d+)  # Hour modified, placed into the variable 'hour'.
 515             :
 516             (?P<min> \d+)   # Minute modified, placed into the variable 'min'.
 517         |
 518             (?P<year> \d+)  # If hours and minutes are absent (happens when year is not the current year), extract the year instead.
 519         )
 520         \s+
 521         (?P<filename> [A-Za-z0-9"'.\-_,~()=+#]+)    # Path and file name containing letters, numbers, and funny characters.
 522         $                                           # We must escape some of these characters with a backslash, \.
 523         """
 524 
 525     def __init__( self ):
 526         """Set up the user settings."""
 527 
 528         self.privateSettings = []
 529         self.platformName = ""
 530         self.masterRootDir = ""
 531 
 532         # Import the user settings from the parameter file.
 533         self.getPlatform()
 534         self.getMasterRootDir()
 535         self.getPrivateSettings()
 536 
 537     def getPrivateSettings( self ):
 538         """
 539         Read web account private settings from a secret offline parameter file.  Return an array of strings.
 540         e.g. self.privateSettings[ 19 ] = "seanerikoconnor.freeservers.com", where the index 19 = UserSettings.SERVER
 541         """
 542 
 543         # Private file which contains my account settings.
 544         inFileName = self.masterRootDir + self.PARAMETERS_FILE
 545 
 546         try:
 547             fin = open( inFileName, "r" )
 548         except IOError as detail:
 549             logging.error( "Cannot open the private settings file {0:s}: {1:s}.  Aborting...".format( inFileName, str( detail ) ) )
 550             sys.exit()
 551 
 552         # Read each line of the file, aborting if there is a read error.
 553         try:
 554             line = fin.readline()
 555             while line:
 556                 self.privateSettings.append( line.strip() )  # Strip off leading and trailing whitespace.
 557                 line = fin.readline()
 558             fin.close()
 559         except Exception as detail:
 560             logging.error( "File I/O error reading private settings file {0:s}: {1:s}.  Aborting...".format( inFileName, str( detail ) ) )
 561             sys.exit()
 562 
 563         return
 564 
 565     def getPlatform( self ):
 566         """Find out which type of computer platform we are running on. """
 567 
 568         # Look at the computer name and try to figure out which of my platforms I'm running on.
 569         if platform.node().endswith( 'Artificer' ):       # Mac OS on MacBook Pro
 570             self.platformName = "Mac OS"
 571         elif platform.node() == "Waring":  # Windows 7 64-bit running in Parallels running in Mac OS.
 572             self.platformName = "Win"
 573         elif platform.node() == "Gauss":   # Ubuntu Linux on my old MacBook Pro
 574             self.platformName = "Linux"
 575         else:                                             # Guessing Mac OS
 576             self.platformName = "Mac OS"
 577             logging.error( "Can't determine the computer platform from node name |{0:s}|:  guessing Mac OS".format( platform.node() ))
 578 
 579         return
 580 
 581     def getMasterRootDir( self ):
 582         """Get the master web site root directory on this platform."""
 583 
 584         # Each platform has a definite root directory:
 585         # Mac OS
 586         if self.platformName == "Mac OS":
 587             self.masterRootDir = "/Users/seanoconnor/Desktop/Sean/WebSite"
 588         # Ubuntu Linux
 589         elif self.platformName == "Linux":
 590             self.masterRootDir = "/home/seanoconnor/Desktop/Sean/WebSite"
 591         # Windows on Parallels on Mac OS, /cygdrive/c/cygwin/home/Sean/WebSite
 592         elif self.platformName == "Win":
 593             self.masterRootDir = "C:/cygwin/home/Sean/Sean/WebSite"
 594         return
 595 
 596 #----------------------------------------------------------------------------
 597 #  Helper functions
 598 #----------------------------------------------------------------------------
 599 
 600 # Pattern match a regular expression on a string, ignoring case.
 601 def patternMatch( regularExpression, searchString ):
 602     pat = re.compile( regularExpression, re.VERBOSE | re.IGNORECASE )
 603     match = pat.search( searchString )
 604     return [pat, match]
 605 
 606 #----------------------------------------------------------------------------
 607 #  Unit test some of the individual functions.
 608 #----------------------------------------------------------------------------
 609 
 610 class UnitTest( unittest.TestCase ):
 611     def setUp( self ):
 612         self.userSettings = UserSettings()
 613         self.userSettings.getPlatform()
 614         self.userSettings.getMasterRootDir()
 615         self.privateSettings = self.userSettings.privateSettings
 616 
 617     def tearDown( self ):
 618         self.userSettings = None
 619         self.privateSettings = None
 620 
 621     def test_userSettings( self ):
 622         print( "User Settings." )
 623         print( "File size limit             =  {0:d} K".format( int( self.privateSettings[self.userSettings.FILE_SIZE_LIMIT] )))
 624         print( "Computer platform node name = |{0:s}|".format( platform.node() ))
 625         print( "Master root directory       = |{0:s}|".format( self.userSettings.masterRootDir ))
 626         self.assertTrue( True )
 627 
 628     def test_copyrightUpdating( self ):
 629         line    = "Copyright (C) 1234-2019 by Sean Erik O'Connor.  Copyright &copy; 1234-2019 by Sean Erik O'Connor"
 630         newline = "Copyright (C) 1234-2019 by Sean Erik O'Connor.  Copyright &copy; 1234-2019 by Sean Erik O'Connor"
 631         [pat, match] = patternMatch( self.userSettings.COPYRIGHT_LINE, line )
 632         if match:
 633             oldYear = int( match.group( 'oldYear' ))
 634             currentYear = int(time.gmtime()[0])      # Same as call to self.getCurrentYear():
 635             if oldYear < currentYear:
 636                 newCopyright = 'Copyright \g<symbol> \g<oldYear>-' + str( currentYear )
 637                 rewrittenline = pat.sub( newCopyright, line )
 638                 self.assertEqual( newline, rewrittenline, "newline = |{0:s}| rewrittenline = |{1:s}|".format( newline, rewrittenline ))
 639         else:
 640             self.fail()
 641 
 642     def test_updateSoftwareVersion( self ):
 643         currentVersionLine = "|     Primpoly Version 14.0 - A Program for Computing Primitive Polynomials."
 644         newVersionLine     = "|     Primpoly Version 14.0 - A Program for Computing Primitive Polynomials."
 645         [pat, match] = patternMatch( self.userSettings.CURRENT_SOFTWARE_VERSION, currentVersionLine )
 646         if match:
 647             newVersion = self.userSettings.NEW_SOFTWARE_VERSION
 648             updatedVersionLine = pat.sub( newVersion, currentVersionLine )
 649             self.assertEqual( updatedVersionLine, newVersionLine, "updated version line = {0:s} new line = {1:s}".format( updatedVersionLine, newVersionLine ))
 650         else:
 651             self.fail()
 652 
 653     def test_extractFileNameFromFTPListing( self ):
 654         ftpLine = "-rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html"
 655         extractedFileName = "allclasses-frame.html"
 656         [pat, match] = patternMatch( self.userSettings.FTP_LISTING, ftpLine )
 657         if match:
 658             filename = match.group( 'filename' )
 659             self.assertEqual( filename, extractedFileName, "ftpLine = {0:s} extracted file name = {1:s}".format( ftpLine, extractedFileName ))
 660         else:
 661             self.fail()
 662 
 663     def test_checkReplaceSubstring( self ):
 664         oldline = "<script type=\'text/javascript\'>"
 665         newline = "<script>"
 666         [pat, match] = patternMatch( self.userSettings.OLD_SUBSTRING, oldline )
 667 
 668         # Replace a substring.
 669         if match:
 670             rewrittenline = pat.sub( self.userSettings.NEW_SUBSTRING, oldline )
 671             self.assertEqual( newline, rewrittenline, "newline = |{0:s}| rewrittenline = |{1:s}|".format( newline, rewrittenline ))
 672         else:
 673             print( "No match for pattern |{0:s}| in oldline |{1:s}|".format( self.userSettings.OLD_SUBSTRING, oldline ))
 674             self.fail()
 675 
 676     def test_fileTimeAndDate( self ):
 677         print( "Root directory = |{0:s}|".format( self.userSettings.masterRootDir ))
 678         fileName = self.userSettings.masterRootDir + "/Electronics/WebPageImages/PowerSupply1Schematic.psd"
 679         fileEpochTime = os.path.getmtime( fileName )
 680         fileTimeUTC = time.gmtime( fileEpochTime )[ 0 : 6 ]
 681         d = datetime.datetime( fileTimeUTC[0], fileTimeUTC[1], fileTimeUTC[2], fileTimeUTC[3], fileTimeUTC[4], fileTimeUTC[5])
 682         print( "file {0:s} datetime {1:s}\n".format( fileName, d.ctime() ) )
 683         self.assertTrue( True )
 684 
 685 #----------------------------------------------------------------------------
 686 #  Main function
 687 #----------------------------------------------------------------------------
 688 
 689 def main():
 690     """Main program.  Clean up and update my web site."""
 691 
 692     # Print the obligatory legal notice.
 693     print( """
 694     updateweb Version 5.0 - A Python utility program which maintains my web site.
 695     Copyright (C) 2007-2019 by Sean Erik O'Connor.  All Rights Reserved.
 696 
 697     It deletes temporary files, rewrites old copyright lines and email address
 698     lines in source files, then synchronizes all changes to my web sites.
 699 
 700     updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
 701     GNU General Public License.  This is free software, and you are welcome
 702     to redistribute it under certain conditions; see the GNU General Public
 703     License for details.
 704     """ )
 705 
 706     print( "Running Python Version {0:d}.{1:d}.{2:d}".format( sys.version_info[ 0 ], sys.version_info[ 1 ], sys.version_info[ 2 ] ) )
 707 
 708     #---------------------------------------------------------------------
 709     #  Load default settings and start logging.
 710     #---------------------------------------------------------------------
 711 
 712     # Default user settings.
 713     userSettings = UserSettings()
 714 
 715     # Get command line options such as --verbose.  Pass them back as flags in userSettings.
 716     Opt( userSettings )
 717 
 718     # Load all unit test functions named test_* from UnitTest class, run the tests and exit.
 719     if userSettings.UNITTEST:
 720         suite = unittest.TestLoader().loadTestsFromTestCase( UnitTest )
 721         unittest.TextTestRunner(verbosity=2).run( suite )
 722         sys.exit()
 723 
 724     # Start logging to file.  Verbose turns on logging for
 725     # DEBUG, INFO, WARNING, ERROR, and CRITICAL levels.
 726     # Otherwise we log only WARNING, ERROR, and CRITICAL levels.
 727     if userSettings.VERBOSE:
 728         loglevel = logging.DEBUG
 729     else:
 730         loglevel = logging.WARNING
 731 
 732     # Pick the log file name on the host.
 733     if userSettings.CLEANONLY:
 734         userSettings.LOGFILENAME = "/private/logMaster.txt"
 735     else:
 736         userSettings.LOGFILENAME = "/private/logRemote.txt"
 737 
 738     logging.basicConfig( level    = loglevel,
 739                          format   = '%(asctime)s %(levelname)-8s %(message)s',
 740                          datefmt  = '%a, %d %b %Y %H:%M:%S',
 741                          filename = userSettings.masterRootDir + userSettings.LOGFILENAME,
 742                          filemode = 'w' )
 743 
 744     logging.debug( "*** Begin logging ******************************" )
 745 
 746     #---------------------------------------------------------------------
 747     #  Scan the master web site, finding out all files and directories.
 748     #---------------------------------------------------------------------
 749     try:
 750         logging.debug( "Scanning master (local on disk) web site" )
 751         master = MasterWebSite( userSettings )
 752 
 753         print( "Scanning and cleaning local web site...", end='', flush=True ) # Suppress newline and flush output buffer so we can see the message right away.
 754 
 755         master.scan()
 756 
 757         # Clean up the directory by rewriting source code and hypertext and
 758         # removing temporary files.
 759         logging.debug( "Cleaning up master (local on disk) web site" )
 760         changed = master.clean()
 761 
 762         # Rescan if any changes happened.
 763         if changed:
 764             logging.debug( "Detected changes due to to cleanup." )
 765             master.quit()
 766             logging.debug( "Disposing of the old scan." )
 767             del master
 768 
 769             master = MasterWebSite( userSettings )
 770             logging.debug( "*** Rescanning ****************************" )
 771             master.scan()
 772         else:
 773             logging.debug( "No changes detected.  Keeping the original scan." )
 774 
 775         print( "...done!", flush=True )
 776 
 777         # Master web site directories.
 778         masterDirectoryList  = master.directories
 779 
 780         # Master web site filenames only.
 781         masterFilesList = [ fileInfo[ userSettings.FILE_NAME ] for fileInfo in master.files ]
 782 
 783         logging.debug( "*** Master Directories **********************" )
 784         for d in masterDirectoryList:  logging.debug( "\t {0:s} (directory)".format( d ))
 785 
 786         logging.debug( "*** Master Files **********************" )
 787         for f in masterFilesList: logging.debug( "\t {0:s} (file)".format( f ))
 788 
 789         master.quit()
 790 
 791         # Clean up master web site only.  Don't update remote web sites.
 792         if userSettings.CLEANONLY:
 793             logging.debug( "Cleanup finished.  Exiting..." )
 794             sys.exit()
 795 
 796         #---------------------------------------------------------------------
 797         #  Scan the remote hosted web site.
 798         #---------------------------------------------------------------------
 799 
 800         logging.debug( "Reading private settings." )
 801         privateSettings = userSettings.privateSettings
 802 
 803         print( "Scanning remote web site...", end='', flush=True )
 804 
 805         # Pick which web site to update.
 806         logging.debug( "Connecting to primary remote site." )
 807         remote = RemoteWebSite( userSettings,
 808                                 privateSettings[userSettings.SERVER], privateSettings[userSettings.USER],
 809                                 privateSettings[userSettings.PASSWORD], privateSettings[userSettings.FTP_ROOT] )
 810 
 811         logging.debug( "Scanning remote web site" )
 812         remote.scan()
 813         remote.quit()
 814 
 815         print( "...done!", flush=True )
 816 
 817         remoteDirectoryList  = remote.directories
 818         remoteFilesList = [ fileInfo[ userSettings.FILE_NAME ] for fileInfo in remote.files ]
 819 
 820         logging.debug( "*** Remote Directories **********************" )
 821         for d in remoteDirectoryList: logging.debug( "\t remote dir:  {0:s}".format( d ))
 822 
 823         logging.debug( "*** Remote Files **********************" )
 824         for f in remoteFilesList: logging.debug( "\t remote file: {0:s}".format( f ))
 825 
 826         #---------------------------------------------------------------------
 827         # Synchronize the master and remote web sites.
 828         #---------------------------------------------------------------------
 829 
 830         print( "Synchronizing remote and local web sites...", end='', flush=True )
 831 
 832         # Primary web site.
 833         logging.debug( "Connecting to primary remote site for synchronization." )
 834         u = UpdateWeb( userSettings,
 835                        privateSettings[userSettings.SERVER], privateSettings[userSettings.USER],
 836                        privateSettings[userSettings.PASSWORD], privateSettings[userSettings.FTP_ROOT],
 837                        privateSettings[userSettings.FILE_SIZE_LIMIT],
 838                        master.directories, master.files,
 839                        remote.directories, remote.files )
 840 
 841         logging.debug( "Synchronizing remote web site" )
 842         u.update()
 843         u.quit()
 844 
 845         print( "...done!", flush=True )
 846 
 847         del u
 848         del remote
 849         del master
 850 
 851     except RecursionError as detail:
 852         logging.error( "Walking the directory tree got too deep for Python's recursion {0:s}.  Aborting...".format( str( detail ) ))
 853         sys.exit()
 854 
 855     return
 856 
 857 #----------------------------------------------------------------------------
 858 #  Command line option class
 859 #----------------------------------------------------------------------------
 860 
 861 class Opt( object ):
 862     """Get the command line options."""
 863 
 864     def __init__( self, userSettings ):
 865         """Get command line options"""
 866         commandLineParser = optparse.OptionParser()
 867 
 868         # Log all changes, not just warnings and errors.
 869         commandLineParser.add_option( "-v", "--verbose", dest="verbose",
 870                                       help="Turn on verbose mode to log everything",
 871                                       action="store_true" )
 872 
 873         commandLineParser.add_option( "-c", "--cleanonly", dest="cleanonly",
 874                                       help="Do a cleanup on the master web site only.",
 875                                       action="store_true" )
 876 
 877         commandLineParser.add_option( "-t", "--test", dest="test",
 878                                       help="Run unit tests of functions.",
 879                                       action="store_true" )
 880 
 881         (options, args) = commandLineParser.parse_args()
 882 
 883         if len(args) >= 1:
 884             commandLineParser.error( "ERROR:  updateweb.py should not have any arguments:  do python updateweb.py --help" )
 885 
 886         if options.verbose:
 887             userSettings.VERBOSE = True
 888 
 889         if options.cleanonly:
 890             userSettings.CLEANONLY  = True
 891 
 892         if options.test:
 893             userSettings.UNITTEST  = True
 894 
 895 #----------------------------------------------------------------------------
 896 #  Base class for web site processing.
 897 #----------------------------------------------------------------------------
 898 
 899 class WebSite( object ):
 900     """
 901     Abstract class used for analyzing both master (local to disk) and remote (ftp server) web sites.
 902     Contains the common web-walking functions which traverse the directory structures and files.
 903     Subclasses fill in the lower level functions which actually access the directories and files.
 904     Subclasses may also define additional functions unique to local web sites.
 905     """
 906 
 907     def __init__( self, settings ):
 908         """Set up root directories"""
 909 
 910         # Import the user settings.
 911         self.userSettings = settings
 912 
 913         # Queue keeps track of directories not yet processed.
 914         self.queue       = []
 915 
 916         # List of all directories traversed.
 917         self.directories = []
 918 
 919         # List of files traversed, with file information.
 920         self.files       = []
 921 
 922         # Find out the root directory and go there.
 923         self.rootDir = self.getRootDir()
 924         self.gotoRootDir( self.rootDir )
 925 
 926     def getCurrentYear( self ):
 927         """Get the current year.  
 928         This is a static method since it doesn't change class member variables."""
 929         return int(time.gmtime()[0])
 930 
 931     def getCurrentTwoDigitYear( self ):
 932         """Get the last two digits of the current year."""
 933         return self.getCurrentYear() % 100
 934 
 935     def isFileInfoType( self, fileInfo ):
 936         """"Check if we have a file information structure or merely a simple file name.  
 937         This is a static method since it doesn't change class member variables."""
 938         try:
 939             if isinstance( fileInfo, list ):
 940                 return True
 941             elif isinstance( fileInfo, str ):
 942                 return False
 943             else:
 944                 logging.error( "isFileInfoType found a bad type.  Aborting..." )
 945                 sys.exit()
 946         except TypeError as detail:
 947             logging.error( "isFileInfoType found a bad type {0:s}.  Aborting...".format( str( detail ) ))
 948             sys.exit()
 949 
 950     def getRootDir( self ):
 951         """Subclass:  Put code here to get the root directory"""
 952         return ""
 953 
 954     def gotoRootDir( self, rootDir ):
 955         """Subclass:  Put code here to go to the root directory"""
 956         pass # Pythons's do-nothing statement.
 957 
 958     def oneLevelDown( self, d ):
 959         """Subclass:  Fill in with a method which returns a list of the
 960         directories and files immediately beneath dir"""
 961         return [], []
 962 
 963     def walk( self, d, typeOfTreeSearch=TreeWalk.BREADTH_FIRST_SEARCH ):
 964         """Walk a directory in either depth first or breadth first order.  BFS is the default."""
 965 
 966         # Get all subfiles and subdirectories off this node.
 967         subdirectories, subfiles = self.oneLevelDown( d )
 968 
 969         # Add all the subfiles in order.
 970         for f in subfiles:
 971 
 972             name = self.stripRoot( f )
 973             logging.debug( "Webwalking:  Adding file {0:s} to list.".format( name[self.userSettings.FILE_NAME] ))
 974 
 975             # Some files are private so skip them from consideration.
 976             pat=re.compile( self.userSettings.FILE_TO_SKIP )
 977 
 978             if pat.search( name[self.userSettings.FILE_NAME] ):
 979                 logging.warning( "Webwalking:  Skipping private file {0:s}".format( name[self.userSettings.FILE_NAME] ))
 980             # Don't upload the log file due to file locking problems.
 981             elif name[self.userSettings.FILE_NAME].find( self.userSettings.LOGFILENAME ) >= 0:
 982                 logging.debug( "Webwalking:  Skipping log file {0:s}".format( name[self.userSettings.FILE_NAME] ))
 983             # File size limit on some servers.
 984             else:
 985                 self.files.append( name )
 986 
 987         # Queue up the subdirectories.
 988         for d in subdirectories:
 989 
 990             # Some directories are private so skip them from consideration.
 991             pat=re.compile( self.userSettings.DIR_TO_SKIP )
 992             if pat.search( d ):
 993                 logging.warning( "Webwalking:  Skipping private dir {0:s}".format( d ))
 994             else:
 995                 logging.debug( "Webwalking:  Pushing dir {0:s} on the queue.".format( d ))
 996                 self.queue.append( d )
 997 
 998         # Search through the directories.
 999         while len( self.queue ) > 0:
1000             # For breadth first search, remove from beginning of queue.
1001             if typeOfTreeSearch == TreeWalk.BREADTH_FIRST_SEARCH:
1002                 d = self.queue.pop(0)
1003 
1004             # For depth first search, remove from end of queue.
1005             elif typeOfTreeSearch == TreeWalk.DEPTH_FIRST_SEARCH:
1006                 d = self.queue.pop()
1007             else:
1008                 d = self.queue.pop(0)
1009 
1010             name = self.stripRoot( d )
1011             logging.debug( "Webwalking:  Adding relative directory {0:s} to list, full path = {1:s}.".format( name, d ) )
1012             self.directories.append( name )
1013 
1014             self.walk( d )
1015 
1016     def stripRoot( self, fileInfo ):
1017         """Return a path, but strip off the root directory"""
1018 
1019         root = self.rootDir
1020 
1021         # Extract the file name.
1022         if self.isFileInfoType( fileInfo ):
1023             name = fileInfo[ self.userSettings.FILE_NAME ]
1024         else:
1025             name = fileInfo
1026 
1027         # e.g. root = / and name = /Art/foo.txt yields stripped_path = Art/foo.txt
1028         # but root = /Sean and name = /Sean/Art/foo.txt yields stripped_path = Art/foo.txt
1029         lenroot = len( root )
1030         if root == self.userSettings.DEFAULT_ROOT_DIR:
1031             pass
1032         else:
1033             lenroot = lenroot + 1
1034 
1035         stripped_path = name[ lenroot: ]
1036 
1037         if self.isFileInfoType( fileInfo ):
1038             # Update the file name only.
1039             return [stripped_path, fileInfo[ self.userSettings.FILE_TYPE ],
1040                     fileInfo[ self.userSettings.FILE_DATE_TIME ], fileInfo[ self.userSettings.FILE_SIZE ]]
1041         else:
1042             return stripped_path
1043 
1044     def appendRootDir( self, rootDir, name ):
1045         """Append the root directory to a path"""
1046 
1047         # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1048         # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1049         if rootDir == self.userSettings.DEFAULT_ROOT_DIR:
1050             return rootDir + name
1051         else:
1052             return rootDir + "/" + name
1053 
1054     def scan(self):
1055         """Scan the directory tree recursively from the root"""
1056         logging.debug( "Webwalking:  Beginning recursive directory scan from root directory {0:s}".format( self.rootDir ))
1057         self.walk( self.rootDir )
1058 
1059     def modtime( self, f ):
1060         """Subclass:  Get file modification time"""
1061         pass
1062 
1063     def quit( self ):
1064         """Quit web site walking"""
1065         logging.debug( "Finished webwalking the master." )
1066         pass
1067 
1068     def removeDirectory( self, dirName ):
1069         """Subclass:  Remove a directory"""
1070         pass
1071 
1072     def removeFile( self, fileName ):
1073         """Subclass:  Remove a file"""
1074         pass
1075 
1076     def clean( self ):
1077         """Scan through all directories and files in the master on disk web site and process them."""
1078         numChanges = 0
1079 
1080         logging.debug( "Cleaning up the master web page." )
1081 
1082         if self.directories is None or self.files is None:
1083             logging.error( "Web site has no directories or files.  Aborting..." )
1084             sys.exit()
1085 
1086         for d in self.directories:
1087 
1088             if self.isTempDir( d ):
1089                 # Add the full path prefix from the root.
1090                 name = self.appendRootDir( self.getRootDir(), d )
1091                 try:
1092                     logging.debug( "Removing temp dir {0:s} recursively".format( name ))
1093                     shutil.rmtree( name )
1094                     numChanges += 1
1095                 except OSError as detail:
1096                     logging.error( "Cannot remove temp dir {0:s}: {1:s}".format( name, str( detail ) ))
1097 
1098         for f in self.files:
1099 
1100             # Add the full path prefix from the root.
1101             name = self.appendRootDir( self.getRootDir(), f[ self.userSettings.FILE_NAME ] )
1102 
1103             # Remove all temporary files.
1104             if self.isTempFile( f ):
1105                 try:
1106                     logging.debug( "Removing temp file {0:s}".format( name ))
1107                     os.remove( name )
1108                     numChanges += 1
1109                 except OSError as detail:
1110                     logging.error( "Cannot remove temp dir {0:s}: {1:s}".format(  name, str( detail ) ))
1111 
1112             # Update hypertext files.
1113             if self.isSourceFile( f ):
1114                 changed = self.rewriteSourceFile( name )
1115                 if changed:
1116                     numChanges += 1
1117                     logging.debug( "Rewrote hypertext file {0:s}".format( name ))
1118 
1119 #                # After updating, copy to a text file.
1120 #                self.copyToTextFile( name )
1121 #                logging.debug( "Created a copy of the source file {0:s}{1:s}".format( name, self.userSettings.TEXT_FILE_EXT))
1122 
1123         # Flag that at least one file was changed.
1124         if numChanges > 0:
1125             return True
1126 
1127         return False
1128 
1129     def isTempFile( self, fileInfo ):
1130         """Identify a file name as a temporary file"""
1131 
1132         fileName = fileInfo[ self.userSettings.FILE_NAME ]
1133 
1134         # Suffixes and names for temporary files be deleted.
1135         [pat, match] = patternMatch( self.userSettings.TEMP_FILE_SUFFIXES, fileName )
1136         # Remove any files containing twiddles anywhere in the name.
1137         if match or fileName.find( self.userSettings.VIM_TEMP_FILE_EXT ) >= 0:
1138             return True
1139 
1140         return False
1141 
1142     def isTempDir( self, dirName ):
1143         """Identify a name as a temporary directory."""
1144 
1145         p = re.compile( self.userSettings.TEMP_DIR_SUFFIX, re.VERBOSE )
1146         return p.search( dirName )
1147 
1148     def isSourceFile( self, fileInfo ):
1149         """ Check if the file name is a hypertext file."""
1150 
1151         fileName = fileInfo[ self.userSettings.FILE_NAME ]
1152         p = re.compile( self.userSettings.SOURCE_FILE_SUFFIX, re.VERBOSE)
1153         return p.search( fileName )
1154 
1155     def copyToTextFile( self, fileName ):
1156         """Make a copy of a file with a .txt extension"""
1157         pass
1158 
1159     def cleanUpTempFile( self, tempFileName, fileName, changed ):
1160         """Remove the original file, rename the temporary file name to the original name.
1161         If there are no changes, just remove the temporary file.
1162         """
1163         pass
1164 
1165     def processLinesOfFile( self, inFileName, outFileName, processLineFunctionList=None ):
1166         """Process each line of a file with a list of functions.  Create a new temporary file.
1167         The default list is None which means make an exact copy.
1168         """
1169         pass
1170 
1171     def rewriteSubstring( self, line ):
1172         """Rewrite a line containing a pattern of your choice"""
1173 
1174         # Search for the pattern.
1175         [pat, match] = patternMatch( self.userSettings.OLD_SUBSTRING, line )
1176 
1177         # Replace with the new pattern.
1178         if match:
1179             newSubstring = self.userSettings.NEW_SUBSTRING
1180             sub = pat.sub( newSubstring, line )
1181             line = sub
1182 
1183         return line
1184 
1185     def rewriteEmailAddressLine( self, line ):
1186         """Rewrite lines containing old email addresses."""
1187 
1188         # Search for the old email address.
1189         [pat, match] = patternMatch( self.userSettings.OLD_EMAIL_ADDRESS, line )
1190 
1191         # Replace the old address with my new email address.
1192         if match:
1193             newAddress = self.userSettings.NEW_EMAIL_ADDRESS
1194             sub = pat.sub( newAddress, line )
1195             line = sub
1196 
1197         return line
1198 
1199     def rewriteVersionLine( self, line ):
1200         """Rewrite lines containing the current version of software."""
1201 
1202         # Search for the current version.
1203         [pat, match] = patternMatch( self.userSettings.CURRENT_SOFTWARE_VERSION, line )
1204 
1205         # Replace with the new version.
1206         if match:
1207             newVersion = self.userSettings.NEW_SOFTWARE_VERSION
1208             sub = pat.sub( newVersion, line )
1209             line = sub
1210 
1211         return line
1212 
1213     def rewriteCopyrightLine( self, line ):
1214         """Rewrite copyright lines if they are out of date."""
1215 
1216         # Match the lines,
1217         #     Copyright (C) nnnn-mmmm by Sean Erik O'Connor.
1218         #     Copyright &copy; nnnn-mmmm by Sean Erik O'Connor.
1219         # and pull out the old year and save it.
1220         [pat, match] = patternMatch( self.userSettings.COPYRIGHT_LINE, line )
1221 
1222         # Found a match.
1223         if match:
1224             oldYear = int( match.group( 'oldYear' ))
1225 
1226             # Replace the old year with the current year.  We matched and extracted the
1227             # old copyright symbol into the variable 'symbol'.  We now insert it back using
1228             # the replacement text syntax with \g<symbol>.
1229             if oldYear < self.getCurrentYear():
1230                 newCopyright = 'Copyright \g<symbol> \g<oldYear>-' + str( self.getCurrentYear() )
1231                 sub = pat.sub( newCopyright, line )
1232                 line = sub
1233         return line
1234 
1235     def rewriteLastUpdateLine( self, line ):
1236         """Rewrite the Last Updated line if the year is out of date."""
1237 
1238         # Match the last updated line and pull out the year.
1239         #      last updated 01 Jan 19.
1240         p = re.compile( self.userSettings.LAST_UPDATED_LINE, re.VERBOSE | re.IGNORECASE )
1241         m = p.search( line )
1242 
1243         if m:
1244             lastUpdateYear = int( m.group( 'year' ))
1245 
1246             # Convert to four digit years.
1247             if lastUpdateYear > 90:
1248                 lastUpdateYear += 1900
1249             else:
1250                 lastUpdateYear += 2000
1251 
1252             # If the year is old, rewrite to "01 Jan <current year>".
1253             if lastUpdateYear < self.getCurrentYear():
1254                 twoDigitYear = self.userSettings.TWO_DIGIT_YEAR_FORMAT % self.getCurrentTwoDigitYear()
1255                 sub = p.sub( 'last updated 01 Jan ' + twoDigitYear, line )
1256                 line = sub
1257 
1258         return line
1259 
1260     def rewriteSourceFile( self, fileName ):
1261         """Rewrite copyright lines, last updated lines, etc."""
1262         changed = False
1263 
1264         # Create a new temporary file name for the rewritten file.
1265         tempFileName = fileName + self.userSettings.TEMP_FILE_EXT
1266 
1267         # Apply changes to all lines of the file.  Apply change functions in the sequence listed.
1268         if self.processLinesOfFile( fileName, tempFileName,
1269                                     [self.rewriteCopyrightLine,
1270                                      self.rewriteLastUpdateLine,
1271                                      self.rewriteEmailAddressLine,
1272                                      self.rewriteSubstring,
1273                                      self.rewriteVersionLine] ):
1274             changed = True
1275 
1276         # Rename the temp file to the original file name.  If no changes, just delete the temp file.
1277         self.cleanUpTempFile( tempFileName, fileName, changed )
1278 
1279         return changed
1280 
1281 #----------------------------------------------------------------------------
1282 #  Subclass for local web site processing.
1283 #----------------------------------------------------------------------------
1284 
1285 class MasterWebSite( WebSite ):
1286     """Walk the master web directory on local disk down from the root.  Clean up temporary files and do other cleanup work."""
1287 
1288 
1289     def __init__( self, settings ):
1290         """Go to web page root and list all files and directories."""
1291 
1292         # Initialize the parent class.
1293         WebSite.__init__( self, settings )
1294 
1295         self.rootDir = self.getRootDir()
1296         logging.debug( "MasterWebSite.__init__():  \tRoot directory: {0:s}".format( self.rootDir))
1297 
1298     def getRootDir( self ):
1299         """Get the name of the root directory"""
1300         return self.userSettings.masterRootDir
1301 
1302     def gotoRootDir( self, rootDir ):
1303         """Go to the root directory"""
1304 
1305         # Go to the root directory.
1306         logging.debug( "MasterWebSite.gotoRootDir():  \tchdir to root directory:  {0:s}".format( rootDir))
1307         os.chdir( rootDir )
1308 
1309         # Read it back.
1310         self.rootDir = os.getcwd()
1311         logging.debug( "MasterWebSite.gotoRootDir():  \tgetcwd root directory:  {0:s}".format( self.rootDir ))
1312 
1313     def oneLevelDown( self, d ):
1314         """List all files and subdirectories in the current directory, dir.  For files, collect file info
1315         such as time, date and size."""
1316 
1317         directories = []
1318         files = []
1319 
1320         # Change to current directory.
1321         os.chdir( d )
1322 
1323         # List all subdirectories and files.
1324         dirList = os.listdir( d )
1325 
1326         if dirList:
1327             for line in dirList:
1328                 logging.debug( "MasterWebSite.oneLevelDown():  \tlistdir( {0:s} ) =  {1:s}".format( d, line ))
1329 
1330                 # Add the full path prefix from the root.
1331                 name = self.appendRootDir( d, line )
1332                 logging.debug( "MasterWebSite.oneLevelDown():  \tmaster dir/file (full path): {0:s}".format( name))
1333 
1334                 # Is it a directory or a file?
1335                 if os.path.isdir( name ):
1336                     directories.append( name )
1337                 elif os.path.isfile( name ):
1338                     # First assemble the file information of name, time/date and size into a list.  Can index it like an array.
1339                     # e.g. fileInfo = [ '/WebDesign/EquationImages/equation001.png', 1, datetime.datetime(2010, 2, 3, 17, 15), 4675]
1340                     #     fileInfo[ 0 ] = '/WebDesign/EquationImages/equation001.png'
1341                     #     fileInfo[ 3 ] = 4675
1342                     fileInfo = [name,
1343                                 FileType.FILE,
1344                                 self.getFileDateTime( name ),
1345                                 self.getFileSize( name ) ]
1346                     files.append( fileInfo )
1347 
1348         # Sort the names into order.
1349         if directories:
1350             directories.sort()
1351         if files:
1352             files.sort()
1353 
1354         return directories, files
1355 
1356     def getFileDateTime( self, fileName ):
1357         """Get a local file time and date in UTC.
1358         This is a static method since it doesn't change class member variables."""
1359 
1360         fileEpochTime = os.path.getmtime( fileName )
1361         fileTimeUTC = time.gmtime( fileEpochTime )[ 0 : 6 ]
1362         # year, month,   day, hour,   minute, seconds
1363         d = datetime.datetime( fileTimeUTC[0], fileTimeUTC[1],
1364                                fileTimeUTC[2], fileTimeUTC[3],
1365                                fileTimeUTC[4], fileTimeUTC[5])
1366         return d
1367 
1368     def getFileSize( self, fileName ):
1369         """Get file size in bytes.
1370         This is a static method since it doesn't change class member variables."""
1371         return os.path.getsize( fileName )
1372 
1373     def copyToTextFile( self, fileName ):
1374         """Make a copy of a file with a .txt extension"""
1375 
1376         # Remove the old copy with the text file extension.
1377         copyFileName = fileName + self.userSettings.TEXT_FILE_EXT
1378         try:
1379             os.remove( copyFileName )
1380         except OSError as detail:
1381             logging.error( "Cannot remove old text file copy {0:s}: {1:s}".format(  copyFileName, str( detail ) ) )
1382 
1383         # Create the new copy, which is an exact duplicate.
1384         self.processLinesOfFile( fileName, copyFileName )
1385 
1386         # Make the new copy have the same modification and access time and date as the original
1387         # since it is just an exact copy.
1388         # That way we won't upload copies with newer times constantly, just because they look as
1389         # though they've been recently modified.
1390         fileStat = os.stat( fileName )
1391         os.utime( copyFileName, (fileStat[stat.ST_ATIME], fileStat[stat.ST_MTIME]))
1392         logging.debug( "Reset file time to original time for copy {0:s}".format( copyFileName ) )
1393 
1394     def cleanUpTempFile( self, tempFileName, fileName, changed ):
1395         """Remove the original file, rename the temporary file name to the original name.
1396         If there are no changes, just remove the temporary file.
1397         """
1398 
1399         if changed:
1400             # Remove the old file now that we have the rewritten file.
1401             try:
1402                 os.remove( fileName )
1403                 logging.debug( "Changes were made.  Remove original file {0:s}".format( fileName ))
1404             except OSError as detail:
1405                 logging.error( "Cannot remove old file {0:s}: {1:s}.  Need to remove it manually.".format( fileName, str( detail ) ) )
1406 
1407             # Rename the new file to the old file name.
1408             try:
1409                 os.rename( tempFileName, fileName )
1410                 logging.debug( "Rename temp file {0:s} to original file {1:s}".format( tempFileName, fileName ))
1411             except OSError as detail:
1412                 logging.error( "Cannot rename temporary file {0:s} to old file name {1:s}: {2:s}.  Need to do it manually".format( tempFileName, fileName, str( detail ) ))
1413         else:
1414             # No changes?  Remove the temporary file.
1415             try:
1416                 os.remove( tempFileName )
1417                 logging.debug( "No changes were made.  Remove temporary file {0:s}".format( tempFileName ))
1418             except OSError as detail:
1419                 logging.error( "Cannot remove temporary file {0:s}: {1:s}.  Need to remove it manually.".format( tempFileName, str( detail )))
1420         return
1421 
1422     def processLinesOfFile( self, inFileName, outFileName, processLineFunctionList=None ):
1423         """Process each line of a file with a list of functions.  Create a new temporary file.
1424         The default list is None which means make an exact copy.
1425         """
1426 
1427         fin = None
1428         fout = None
1429 
1430         # Assume no changes.
1431         changed = False
1432 
1433         try:
1434             fin = open( inFileName, "r" )
1435         except IOError as detail:
1436             logging.error( "processLinesOfFile():  \tCannot open file {0:s} for reading:  {1:s}".format( inFileName, str( detail ) ) )
1437 
1438         try:
1439             fout = open( outFileName, "w" )
1440         except IOError as detail:
1441             logging.error( "processLinesOfFile():  \tCannot open file {0:s} for writing:  {1:s}".format( outFileName, str( detail ) ) )
1442 
1443         # Read each line of the file, aborting if there is a read error.
1444         try:
1445             line = fin.readline()
1446 
1447             while line:
1448                 original_line = line
1449                 if processLineFunctionList is None:
1450                     # For a simple copy, just duplicate the line unchanged.
1451                     pass
1452                 else:
1453                     # Otherwise, apply changes in succession to the line.
1454                     for processLineFunction in processLineFunctionList:
1455                         line = processLineFunction( line )
1456 
1457                 if original_line != line:
1458                     logging.debug( "Rewrote the line >>>{0:s}<<< to >>>{1:s}<<<".format( original_line, line ) )
1459                     changed = True
1460 
1461                 fout.write( line )
1462 
1463                 line = fin.readline()
1464 
1465             fin.close()
1466             fout.close()
1467         except IOError as detail:
1468             logging.error(  "File I/O error during reading/writing file {0:s} in processLinesOfFile: {1:s}  Aborting...".format( inFileName, str( detail ) ) )
1469             sys.exit()
1470 
1471         if changed:
1472             logging.debug( "processLinesOfFile():  \tRewrote original file {0:s}.  Changes are in temporary copy {1:s}".format( inFileName, outFileName ) )
1473 
1474         # Return True if any lines were changed.
1475         return changed
1476 
1477 #----------------------------------------------------------------------------
1478 #   Subclass for remote web site processing.
1479 #----------------------------------------------------------------------------
1480 
1481 class RemoteWebSite( WebSite ):
1482     """Walk the remote web directory on a web server down from the root."""
1483 
1484 
1485     def __init__( self, settings, server, user, password, ftproot ):
1486         """Connect to FTP server and list all files and directories."""
1487 
1488         # Root directory of FTP server.
1489         self.rootDir = ftproot
1490         logging.debug( "Requesting remote web site ftp root dir {0:s}".format( self.rootDir ))
1491 
1492         # Connect to FTP server and log in.
1493         try:
1494             #self.ftp.set_debuglevel( 2 )
1495             self.ftp = ftplib.FTP( server )
1496             self.ftp.login( user, password )
1497         # Catch all exceptions with the parent class Exception:  all built-in, non-system-exiting exceptions are derived from this class.
1498         except Exception as detail:
1499             # Extract the string message from the exception class with str().
1500             logging.error( "Remote web site cannot login to ftp server: {0:s}  Aborting...".format( str( detail ) ))
1501             sys.exit()
1502         else:
1503             logging.debug( "Remote web site ftp login succeeded." )
1504 
1505         logging.debug( "Remote web site ftp welcome message {0:s}".format( self.ftp.getwelcome() ))
1506 
1507         # Initialize the superclass.
1508         WebSite.__init__( self, settings )
1509 
1510     def gotoRootDir( self, rootDir ):
1511         """Go to the root directory"""
1512 
1513         try:
1514             # Go to the root directory.
1515             self.ftp.cwd( rootDir )
1516             logging.debug( "ftp root directory (requested) = {0:s}".format( self.rootDir ))
1517 
1518             # Read it back.
1519             self.rootDir = self.ftp.pwd()
1520             logging.debug( "ftp root directory (read back from server): {0:s}".format( self.rootDir ))
1521 
1522         except Exception as detail:
1523             logging.error( "gotoRootDir(): \tCannot ftp cwd or pwd root dir {0:s} Aborting...".format( (rootDir, str( detail ) )))
1524             sys.exit()
1525 
1526     def getRootDir( self ):
1527         """Get the root directory name"""
1528 
1529         return self.rootDir
1530 
1531     def quit(self):
1532         """Quit web site walking"""
1533 
1534         logging.debug( "Quitting remote site." )
1535         try:
1536             self.ftp.quit()
1537         except Exception as detail:
1538             logging.error( "Cannot ftp quit: {0:s}".format( str( detail ) ))
1539 
1540     def oneLevelDown( self, d ):
1541         """List files and directories in a subdirectory using ftp"""
1542 
1543         directories = []
1544         files = []
1545 
1546         try:
1547             # ftp listing from current dir.
1548             logging.debug( "RemoteWebSite.oneLevelDown():  \tftp cwd: {0:s}".format( d ))
1549             self.ftp.cwd( d  )
1550             dirList = []
1551 
1552             self.ftp.retrlines( 'LIST', dirList.append )
1553         except Exception as detail:
1554             logging.error( "oneLevelDown(): \tCannot ftp cwd or ftp LIST dir {0:s}:  {1:s} Aborting...".format( d , str( detail )  ))
1555             sys.exit()
1556 
1557         for line in dirList:
1558             logging.debug( "RemoteWebSite.oneLevelDown():  \tftp LIST: {0:s}".format( line))
1559 
1560             # Line should at least have the minimum FTP information.
1561             if len(line) >= self.userSettings.MIN_FTP_LINE_LENGTH:
1562                 fileInfo = self.getFTPFileInformation( line )
1563 
1564                 if fileInfo[ self.userSettings.FILE_NAME ] == "":
1565                     logging.error( "RemoteWebSite.oneLevelDown():  \tFTP LIST file name is NULL:" )
1566 
1567                 logging.debug( "RemoteWebSite.oneLevelDown():  \tftp parsed file info: {0:s}".format( fileInfo[ self.userSettings.FILE_NAME ] ))
1568 
1569                 # Prefix the full path prefix from the root to the directory name and add to the directory list.
1570                 if fileInfo[ self.userSettings.FILE_TYPE ] == FileType.DIRECTORY:
1571                     dirname = self.appendRootDir( d , fileInfo[ self.userSettings.FILE_NAME ] )
1572                     logging.debug( "RemoteWebSite.oneLevelDown():  \tftp dir (full path): {0:s}".format( dirname ))
1573                     directories.append( dirname )
1574                 # Add file information to the list of files.
1575                 else:
1576                     # Update the file name only:  add the full path prefix from the root.
1577                     fileInfo[ self.userSettings.FILE_NAME ] = self.appendRootDir( d,  fileInfo[ self.userSettings.FILE_NAME ] )
1578                     logging.debug( "RemoteWebSite.oneLevelDown():  \tftp file (full path): {0:s}".format( fileInfo[ self.userSettings.FILE_NAME ] ))
1579                     files.append( fileInfo )
1580             else:
1581                 logging.error( "RemoteWebSite.oneLevelDown():  \tFTP LIST line is too short:  {0:s}".format( line ))
1582 
1583         directories.sort()
1584         files.sort()
1585 
1586         return directories, files
1587 
1588     def modtime( self, f ):
1589         """Get the modification time of a file via ftp.  Return 0 if ftp cannot get it."""
1590         modtime = 0
1591 
1592         try:
1593             response = self.ftp.sendcmd( 'MDTM ' + f )
1594             # MDTM returns the last modified time of the file in the format
1595             # "213 YYYYMMDDhhmmss \r\n <error-response>
1596             # MM is 01 to 12, DD is 01 to 31, hh is 00 to 23, mm is 00 to 59, ss is 0 to 59.
1597             # error-response is 550 for info not available, and 500 or 501 if command cannot
1598             # be parsed.
1599             if response[:3] == '213':
1600                 modtime = response[4:]
1601         except ftplib.error_perm:
1602             modtime = 0
1603 
1604         return modtime
1605 
1606     def getFTPFileInformation( self, line ):
1607         """Parse the ftp file listing and return file name, datetime and file size.
1608            We can have problems on New Year's Eve.  For example, the master file date/time is
1609               Mon Jan  1 06:23:12 2018 
1610           but the remote time from the FTP listing doesn't show a year since the server is still in 2017
1611               Mon Dec 31 03:02:00 
1612           Thus the remote time will default to
1613               Mon Dec 31 03:02:00 2018
1614           and we think that the remote file is newer by 363.860278 days.
1615         """
1616 
1617         # Find out if we've a directory or a file.
1618         if line[0] == 'd':
1619             dirOrFile = FileType.DIRECTORY
1620         else:
1621             dirOrFile = FileType.FILE
1622 
1623         pattern = re.compile( self.userSettings.FTP_LISTING, re.VERBOSE )
1624 
1625         # Sensible defaults.
1626         filesize = 0
1627         filename = ""
1628         hour    = 0
1629         minute  = 0
1630         seconds = 0
1631         month   = 1
1632         day     = 1
1633         year    = self.getCurrentYear()
1634 
1635         # Extract time and date from the ftp listing.
1636         match = pattern.search( line )
1637 
1638         if match:
1639             filesize = int( match.group( 'bytes' ) )
1640             month    = self.userSettings.monthToNumber[ match.group( 'mon' ) ]
1641             day      = int( match.group( 'day' ) )
1642 
1643             # Pull out the year if it's present.  If so, the FTP listing will omit the hour and minute.
1644             # Default the time to hour 0, minute 0 (i.e. midnight).
1645             if match.group( 'year' ):
1646                 year = int( match.group( 'year' ) )
1647 
1648             # If the FTP listing has no year, get the hour and minute.  
1649             # Default to the current year.
1650             if match.group( 'hour' ) and match.group( 'min' ):
1651                 hour   = int( match.group( 'hour' ) )
1652                 minute = int( match.group( 'min' ) )
1653 
1654             filename = match.group( 'filename' )
1655 
1656         # Package up the time and date nicely.
1657         # Note if we didn't get any matches, we'll default the remote date and time to Jan 1 midnight of the current year.
1658         d = datetime.datetime( year, month, day, hour, minute, seconds )
1659 
1660         return [filename, dirOrFile, d, filesize]
1661 
1662 class UpdateWeb( object ):
1663     """Given previously scanned master and remote directories, update the remote web site."""
1664 
1665 
1666     def __init__( self, settings, server, user, password, ftproot, fileSizeLimit,
1667                   masterDirectoryList, masterFileInfo, remoteDirectoryList, remoteFileInfo ):
1668         """Connect to remote site.  Accept previously scanned master and remote files and directories."""
1669 
1670         self.userSettings = settings
1671 
1672         self.masterFilesList      = []
1673         self.remoteFilesList      = []
1674         self.masterFileToSize     = {}
1675         self.masterFileToDateTime = {}
1676         self.remoteFileToDateTime = {}
1677         self.masterOnlyDirs       = []
1678         self.masterOnlyFiles      = []
1679         self.remoteOnlyDirs       = []
1680         self.remoteOnlyFiles      = []
1681         self.commonFiles          = []
1682 
1683         # Connect to FTP server and log in.
1684         try:
1685             self.ftp = ftplib.FTP( server )
1686             self.ftp.login( user, password )
1687         except Exception as detail:
1688             logging.error( "Cannot login to ftp server: {0:s} Aborting...".format( str( detail )  ))
1689             sys.exit()
1690         else:
1691             logging.debug( "ftp login succeeded." )
1692 
1693         logging.debug( "ftp server welcome message:  {0:s}".format( self.ftp.getwelcome() ))
1694 
1695         # Master root directory.
1696         self.masterRootDir = self.userSettings.masterRootDir
1697         logging.debug( "Master (local to disk) root directory: {0:s}".format( self.masterRootDir))
1698 
1699         # Root directory of FTP server.
1700         self.ftpRootDir = ftproot
1701         logging.debug( "ftp root directory (requested) = {0:s}".format( self.ftpRootDir ))
1702 
1703         # Transform KB string to integer bytes.  e.g. "200" => 2048000
1704         self.fileSizeLimit = int( fileSizeLimit ) * 1024
1705 
1706         try:
1707             # Go to the root directory.
1708             self.ftp.cwd( self.ftpRootDir )
1709 
1710             # Read it back.
1711             self.ftpRootDir = self.ftp.pwd()
1712             logging.debug( "ftp root directory (read back from server): {0:s}".format( self.ftpRootDir ))
1713         except Exception as detail:
1714             logging.error( "UpdateWeb(): \tCannot ftp cwd or ftp LIST dir {0:s} Aborting...".format( (self.ftpRootDir, str( detail ) )) )
1715 
1716         self.masterDirectoryList = masterDirectoryList
1717         self.remoteDirectoryList = remoteDirectoryList
1718         self.masterFileInfo = masterFileInfo
1719         self.remoteFileInfo = remoteFileInfo
1720 
1721     def appendRootDir( self, rootDir, name ):
1722         """Append the root directory to a path"""
1723 
1724         # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1725         # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1726         if rootDir == self.userSettings.DEFAULT_ROOT_DIR:
1727             return rootDir + name
1728         else:
1729             return rootDir + "/" + name
1730 
1731     def fileInfo( self ):
1732         """Create lists of file names from the file information.  Also create dictionaries which map file names onto 
1733         dates, times, and sizes."""
1734 
1735         # Extract file names.
1736         self.masterFilesList = [ fileInfo[ self.userSettings.FILE_NAME ] for fileInfo in self.masterFileInfo ]
1737         self.remoteFilesList = [ fileInfo[ self.userSettings.FILE_NAME ] for fileInfo in self.remoteFileInfo ]
1738 
1739         # Use a dictionary comprehension to create key/value pairs, (file name, file date/time), which map file names onto date/time.
1740         self.masterFileToDateTime = { fileInfo[ self.userSettings.FILE_NAME ] : fileInfo[ self.userSettings.FILE_DATE_TIME ] \
1741                                       for fileInfo in self.masterFileInfo }
1742         self.remoteFileToDateTime = { fileInfo[ self.userSettings.FILE_NAME ] : fileInfo[ self.userSettings.FILE_DATE_TIME ] \
1743                                       for fileInfo in self.remoteFileInfo }
1744 
1745         # Dictionary comprehension creates a mapping of master file names onto file sizes.
1746         self.masterFileToSize = { fileInfo[ self.userSettings.FILE_NAME ] : fileInfo[ self.userSettings.FILE_SIZE ] \
1747                                   for fileInfo in self.masterFileInfo }
1748 
1749     def update( self ):
1750         """Scan through the master web site, cleaning it up.
1751         Go to remote web site on my servers and synchronize all files."""
1752 
1753         self.fileInfo()
1754 
1755         # Which files and directories are different.
1756         self.changes()
1757 
1758         # Synchronize with the master.
1759         self.synchronize()
1760 
1761     def changes( self ):
1762         """Find the set of different directories and files on master and remote."""
1763 
1764         # Add all directories which are only on master to the dictionary.
1765         dir_to_type = { d : FileType.ON_MASTER_ONLY for d in self.masterDirectoryList }
1766 
1767         # Scan through all remote directories, adding those only on remote or on both.
1768         for d in self.remoteDirectoryList:
1769             if d in dir_to_type:
1770                 dir_to_type[ d ] = FileType.ON_BOTH_MASTER_AND_REMOTE
1771             else:
1772                 dir_to_type[ d ] = FileType.ON_REMOTE_ONLY
1773 
1774         # Add all files which are only on master to the dictionary.
1775         file_to_type = { file : FileType.ON_MASTER_ONLY for file in self.masterFilesList }
1776 
1777         # Scan through all remote files, adding those only on remote or on both.
1778         for file in self.remoteFilesList:
1779             if file in file_to_type:
1780                 file_to_type[ file ] = FileType.ON_BOTH_MASTER_AND_REMOTE
1781             else:
1782                 file_to_type[ file ] = FileType.ON_REMOTE_ONLY
1783 
1784         logging.debug( "Raw dictionary dump of directories" )
1785         for k, v in dir_to_type.items(): logging.debug( "\t dir:  {0:s}  type: {1:s}".format( str( k ), str( v ) ))
1786         logging.debug( "Raw dictionary dump of files" )
1787         for k, v in file_to_type.items(): logging.debug( "\t file: {0:s}  type: {1:s}".format( str( k ), str( v ) ))
1788 
1789         # List of directories only on master.  Keep the ordering.
1790         self.masterOnlyDirs = []
1791         for d in self.masterDirectoryList:
1792             if dir_to_type[ d ] == FileType.ON_MASTER_ONLY:
1793                 self.masterOnlyDirs.append( d )
1794 
1795         # List of directories only on remote.  Keep the ordering.
1796         self.remoteOnlyDirs = []
1797         for d in self.remoteDirectoryList:
1798             if dir_to_type[ d ] == FileType.ON_REMOTE_ONLY:
1799                 self.remoteOnlyDirs.append( d )
1800 
1801         # We don't care about common directories, only their changed files, if any.
1802 
1803         # List of files only on master.  Keep the ordering.
1804         self.masterOnlyFiles = []
1805         for file in self.masterFilesList:
1806             if file_to_type[ file ] == FileType.ON_MASTER_ONLY:
1807                 self.masterOnlyFiles.append( file )
1808 
1809         # List of files only on remote.  Keep the ordering.
1810         self.remoteOnlyFiles = []
1811         for file in self.remoteFilesList:
1812             if file_to_type[ file ] == FileType.ON_REMOTE_ONLY:
1813                 self.remoteOnlyFiles.append( file )
1814 
1815         # List of common files on both master and remote.  Keep the ordering.
1816         self.commonFiles = []
1817         for file in self.masterFilesList:
1818             if file_to_type[ file ] == FileType.ON_BOTH_MASTER_AND_REMOTE:
1819                 self.commonFiles.append( file )
1820 
1821         logging.debug( "*** Directories only on master ******************************" )
1822         for d in self.masterOnlyDirs: logging.debug( "\t {0:s}".format( d ))
1823 
1824         logging.debug( "*** Directories only on remote ******************************" )
1825         for d in self.remoteOnlyDirs: logging.debug( "\t {0:s}".format( d ))
1826 
1827         logging.debug( "*** Files only on master ******************************" )
1828         for file in self.masterOnlyFiles: logging.debug( "\t {0:s}".format( file ))
1829 
1830         logging.debug( "*** Files only on remote ******************************" )
1831         for file in self.remoteOnlyFiles: logging.debug( "\t {0:s}".format( file ))
1832 
1833         logging.debug( "*** Common files ******************************" )
1834         for file in self.commonFiles:
1835             logging.debug( "\tname {0:s} master time {1:s} remote time {2:s}".format(
1836                     file, self.masterFileToDateTime[ file ].ctime(), self.remoteFileToDateTime[ file ].ctime()))
1837 
1838     def synchronize( self ):
1839         """Synchronize files in the remote directory with the master directory."""
1840 
1841         # Compare the common files for time and date.
1842         for f in self.commonFiles:
1843             masterFileTime = self.masterFileToDateTime[ f ]
1844             remoteFileTime = self.remoteFileToDateTime[ f ]
1845 
1846             # How many fractional days different are we?
1847             days_different = \
1848                     abs( (remoteFileTime - masterFileTime).days + (remoteFileTime - masterFileTime).seconds / (60.0 * 60.0 * 24.0) )
1849             uploadToHost = False
1850 
1851             logging.debug( "Common file:  {0:s}.".format( f ))
1852 
1853             # Remote file time is newer.
1854             if remoteFileTime > masterFileTime:
1855                 # Remote file time is MUCH newer:  suspect time is out of joint on the server, so upload local master file to be safe.
1856                 if days_different >= self.userSettings.DAYS_NEWER_FOR_REMOTE_NEW_YEARS_GLITCH:
1857                     logging.error( "Remote file {0:s} is newer by {1:f} days.  Probably New Year's glitch."
1858                                       .format(f, days_different))
1859                     logging.error( "\tmaster time {0:s} remote time {1:s}".format( masterFileTime.ctime(), remoteFileTime.ctime() ) )
1860 
1861                     # Set the master file to the current time.
1862                     fullFileName = self.appendRootDir( self.masterRootDir, f )
1863                     if os.path.exists( fullFileName ):
1864                         os.utime( fullFileName, None )
1865                         logging.error( "Touching master file {0:s} to make it the current time".format( fullFileName ))
1866 
1867                     uploadToHost = True
1868                 # Remote file time is only slightly newer;  probably OK, just a little time inaccuracy on the server.
1869                 else:
1870                     logging.debug( "Remote file {0:s} is slightly newer by {1:f} days.  Probably a wee bit of time inaccuracy on the server.  Wait -- don't upload yet." \
1871                                     .format(f,days_different))
1872                     logging.debug( "\tmaster time {0:s} remote time {1:s}".format( masterFileTime.ctime(), remoteFileTime.ctime() ))
1873                     uploadToHost = False
1874             # Master file time is newer.
1875             elif masterFileTime > remoteFileTime:
1876                 # Master file time is newer (by several minutes), that it's likely to be changed;  upload.
1877                 if days_different >= self.userSettings.DAYS_NEWER_FOR_MASTER_BEFORE_UPLOAD:
1878                     logging.warning( "Master file {0:s} is newer by {1:f} days.  Preparing for upload.".format(f, days_different ))
1879                     logging.warning( "\tmaster time {0:s} remote time {1:s}".format( masterFileTime.ctime(), remoteFileTime.ctime() ))
1880                     uploadToHost = True
1881                 else:
1882                     logging.debug( "Master file {0:s} is slightly newer by {1:f} days.  Wait -- don't upload yet.".format( f, days_different ))
1883                     logging.debug( "\tmaster time {0:s} remote time {1:s}".format( masterFileTime.ctime(), remoteFileTime.ctime() ))
1884                     uploadToHost = False
1885 
1886             #  But override the upload if the file is too big for the server.
1887             size = self.masterFileToSize[ f ]
1888             if size >= self.fileSizeLimit:
1889                 logging.error( "upload():  Skipping upload of file {0:s} of size {1:d};  too large for server, limit is {2:d} bytes" \
1890                                .format(f, size, self.fileSizeLimit ))
1891                 uploadToHost = False
1892 
1893             if uploadToHost:
1894                 print( "Uploading changed file {0:s}...".format( f ), end='', flush=True )
1895                 self.upload( f )
1896 
1897         # Remote directory is not in master.  Delete it.
1898         for d in self.remoteOnlyDirs:
1899             logging.debug( "Remote only dir.  Attempting to delete it:  {0:s}".format( d ))
1900             print( "Deleting remote directory {0:s}...".format( d ), end='', flush=True )
1901             self.rmdir( d )
1902 
1903         # Master directory missing on remote.  Create it.
1904         # Due to breadth first order scan, we'll create parent directories before child directories.
1905         for d in self.masterOnlyDirs:
1906             logging.debug( "Master only dir.  Creating dir {0:s} on remote.".format( d ))
1907             print( "Creating new remote directory {0:s}...".format( d ), end='', flush=True )
1908             self.mkdir( d )
1909 
1910         # Master file file missing on remote.  Upload it.
1911         for f in self.masterOnlyFiles:
1912             logging.debug( "Master only file.  Uploading {0:s} to remote.".format( f ))
1913 
1914             #  But override the upload if the file is too big for the server.
1915             size = self.masterFileToSize[ f ]
1916             if size >= self.fileSizeLimit:
1917                 logging.error( "upload():  Skipping upload of file {0:s} of size {1:d};  too large for server, limit is {2:d} bytes" \
1918                                .format(f, size, self.fileSizeLimit ))
1919             else:
1920                 print( "Uploading new file {0:s}...".format( f ), end='', flush=True )
1921                 self.upload( f )
1922 
1923         # Remote contains a file not present on the master.  Delete the file.
1924         for f in self.remoteOnlyFiles:
1925             logging.debug( "Remote only file.  Deleting remote file {0:s}.".format( f ))
1926             print( "Deleting remote file {0:s}...".format( f ), end='', flush=True )
1927             self.delRemote( f )
1928 
1929     def delRemote( self, relativeFilePath ):
1930         """Delete a file using ftp."""
1931 
1932         logging.debug( "delRemote():  \trelative file path name: {0:s}".format( relativeFilePath ))
1933 
1934         # Parse the relative file path into file name and relative directory.
1935         relativeDir, fileName = os.path.split( relativeFilePath )
1936         logging.debug( "delRemote():  \tfile name: {0:s}".format( fileName ))
1937         logging.debug( "delRemote():  \trelative dir: {0:s}".format( relativeDir ))
1938         logging.debug( "delRemote():  \tremote root dir: {0:s}".format( self.ftpRootDir ))
1939 
1940         try:
1941             # Add the remote root path and go to the remote directory.
1942             remoteDir = self.appendRootDir( self.ftpRootDir, relativeDir )
1943             logging.debug( "delRemote():  \tftp cd remote dir: {0:s}".format( remoteDir ))
1944             self.ftp.cwd( remoteDir )
1945         except Exception as detail:
1946             logging.error( "delRemote():  \tCannot ftp chdir: {0:s}  Skipping...".format( str( detail ) ))
1947         else:
1948             try:
1949                 logging.debug( "delRemote():  \tftp rm: {0:s}".format( fileName ))
1950 
1951                 # Don't remove zero length file names.
1952                 if len( fileName ) > 0:
1953                     self.ftp.delete( fileName )
1954                 else:
1955                     logging.warning( "delRemote():  skipping ftp delete;  file NAME {0:s} had zero length".format( fileName ))
1956             except Exception as detail:
1957                 logging.error( "delRemote():  \tCannot ftp rm: {0:s}".format( str( detail ) ))
1958 
1959     def mkdir( self, relativeDir ):
1960         """Create new remote directory using ftp."""
1961 
1962         logging.debug( "mkdir():  \trelative dir path name: {0:s}".format( relativeDir ))
1963         logging.debug( "mkdir():  \tremote root dir: {0:s}".format( self.ftpRootDir ))
1964 
1965         # Parse the relative dir path into prefix dir and suffix dir.
1966         path, d = os.path.split( relativeDir )
1967         logging.debug( "mkdir():  \tremote prefix dir: {0:s}".format( path ))
1968         logging.debug( "mkdir():  \tremote dir:  {0:s}".format( d ))
1969 
1970         try:
1971             # Add the remote root path and go to the remote directory.
1972             remoteDir = self.appendRootDir( self.ftpRootDir, path )
1973             logging.debug( "mkdir():  \tftp cd remote dir: {0:s}".format( remoteDir ))
1974             self.ftp.cwd( remoteDir )
1975         except Exception as detail:
1976             logging.error( "mkdir():  \tCannot ftp chrdir: {0:s}  Skipping...".format( str( detail ) ))
1977         else:
1978             try:
1979                 logging.debug( "mkdir():  \tftp mkd: {0:s}".format( d ))
1980                 self.ftp.mkd( d )
1981             except Exception as detail:
1982                 logging.error( "mkdir():  \tCannot ftp mkdir: {0:s}".format( str( detail ) ))
1983 
1984     def rmdir( self, relativeDir ):
1985         """Delete an empty directory using ftp."""
1986 
1987         logging.debug( "rmdir():  \tintermediate dir path name: {0:s}".format( relativeDir ))
1988         logging.debug( "rmdir():  \tremote root dir: {0:s}".format( self.ftpRootDir ))
1989 
1990         # Parse the relative dir path into prefix dir and suffix dir.
1991         path, d = os.path.split( relativeDir )
1992         logging.debug( "rmdir():  \tremote prefix dir: {0:s}".format( path ))
1993         logging.debug( "rmdir():  \tremote dir:  {0:s}".format( d ))
1994 
1995         try:
1996             # Add the remote root path and go to the remote directory.
1997             remoteDir = self.appendRootDir( self.ftpRootDir, path )
1998             logging.debug( "rmdir():  \tftp cd remote dir: {0:s}".format( remoteDir ))
1999             self.ftp.cwd( remoteDir )
2000         except Exception as detail:
2001             logging.error( "rmdir():  \tCannot ftp chdir: {0:s}  Skipping...".format( str( detail ) ))
2002         else:
2003             try:
2004                 logging.debug( "rmdir():  \tftp rmd: {0:s}".format( d ))
2005                 self.ftp.rmd( d )
2006             except Exception as detail:
2007                 logging.error( "rmdir():  \tCannot ftp rmdir dir {0:s}: {1:s}  Directory is probably not empty.  Do a manual delete." .format( d, str( detail ) ))
2008 
2009     def download( self, relativeFilePath ):
2010         """Download a binary file using ftp."""
2011 
2012         logging.debug( "download():  \tfile name: {0:s}".format( relativeFilePath ))
2013 
2014         # Parse the relative file path into file name and relative directory.
2015         relativeDir, fileName = os.path.split( relativeFilePath )
2016         logging.debug( "download():  \tfile name: {0:s}".format( fileName ))
2017         logging.debug( "download():  \trelative dir: {0:s}".format( relativeDir ))
2018         logging.debug( "download():  \troot dir: {0:s}".format( self.ftpRootDir ))
2019 
2020         # Add the remote root path and go to the remote directory.
2021         remoteDir = self.appendRootDir( self.ftpRootDir, relativeDir )
2022         logging.debug( "download():  \tftp cd remote dir: {0:s}".format( remoteDir ))
2023 
2024         try:
2025             self.ftp.cwd( remoteDir )
2026         except Exception as detail:
2027             logging.error( "download():  \tCannot ftp chdir: {0:s}  Skipping...".format( str( detail ) ))
2028         else:
2029             # Add the master root path to get the local file name.
2030             # Open local binary file to write into.
2031             localFileName = self.appendRootDir( self.masterRootDir, relativeFilePath )
2032             logging.debug( "download():  \topen local file name: {0:s}".format( localFileName ))
2033             try:
2034                 f = open( localFileName, "wb" )
2035                 try:
2036                     # Calls f.write() on each block of the binary file.
2037                     #ftp.retrbinary( "RETR " + fileName, f.write )
2038                     pass
2039                 except Exception as detail:
2040                     logging.error( "download():  \tCannot cannot ftp retrbinary: {0:s}".format( str( detail ) ))
2041                 f.close()
2042             except IOError as detail:
2043                 logging.error( "download():  \tCannot open local file {0:s} for reading:  {1:s}".format( localFileName, str( detail ) ))
2044 
2045     def upload( self, relativeFilePath ):
2046         """Upload  a binary file using ftp."""
2047 
2048         logging.debug( "upload():  \trelative file path name: {0:s}".format( relativeFilePath ))
2049 
2050         # Parse the relative file path into file name and relative directory.
2051         relativeDir, fileName = os.path.split( relativeFilePath )
2052         logging.debug( "upload():  \tfile name: {0:s}".format( fileName ))
2053         logging.debug( "upload():  \trelative dir: {0:s}".format( relativeDir ))
2054         logging.debug( "upload():  \tremote root dir: {0:s}".format( self.ftpRootDir ))
2055 
2056         # Add the remote root path and go to the remote directory.
2057         remoteDir = self.appendRootDir( self.ftpRootDir, relativeDir )
2058         logging.debug( "upload():  \tftp cd remote dir: {0:s}".format( remoteDir ))
2059 
2060         try:
2061             self.ftp.cwd( remoteDir )
2062         except Exception as detail:
2063             logging.error( "upload():  \tCannot ftp chdir: {0:s}  Skipping...".format( str( detail ) ))
2064         else:
2065             # Add the master root path to get the local file name.
2066             # Open local binary file to read from.
2067             localFileName = self.appendRootDir( self.masterRootDir, relativeFilePath )
2068             logging.debug( "upload():  \topen local file name: {0:s}".format( localFileName ))
2069 
2070             try:
2071                 f = open( localFileName, "rb" )
2072                 try:
2073                     # f.read() is called on each block of the binary file until EOF.
2074                     logging.debug( "upload():  \tftp STOR file {0:s}".format( fileName ))
2075                     self.ftp.storbinary( "STOR " + fileName, f )
2076                 except Exception as detail:
2077                     logging.error( "upload():  \tCannot ftp storbinary: {0:s}".format( str( detail ) ))
2078                 f.close()
2079             except IOError as detail:
2080                 logging.error( "upload():  \tCannot open local file {0:s} for reading:  {1:s}".format( localFileName, str( detail ) ) )
2081 
2082     def quit( self ):
2083         """Log out of an ftp session"""
2084 
2085         logging.debug( "UpdateWeb::quit()" )
2086         try:
2087             self.ftp.quit()
2088         except Exception as detail:
2089             logging.error( "Cannot ftp quit because {0:s}".format( str( detail ) ))
2090 
2091 
2092 if __name__ == '__main__':
2093     """Python executes all code in the file, so all classes and functions get defined first.  Finally we come here.  
2094     If we are executing this file as a Python script, the name of the current module is set to main,
2095     thus we'll call the main() function."""
2096 
2097     main()