1 #!/usr/bin/env python3
   2 # ============================================================================
   3 #
   4 # NAME
   5 #
   6 #     updateweb.py
   7 #
   8 # DESCRIPTION
   9 #
  10 #     Python script which updates my web sites.
  11 #
  12 #     It does miscellaneous cleanup on my master copy of the web site on disk,
  13 #     including updating copyright information, then synchronizes the master
  14 #     copy to my remote server web sites using FTP.
  15 #
  16 # USAGE
  17 #
  18 #     It's best to use the associated makefile.
  19 #     But you can call this Python utility from the command line,
  20 #
  21 #     $ python updateweb.py          Clean up my master copy, then use it
  22 #                                    to update my remote web server site.
  23 #                                    Log warnings and errors.
  24 #     $ python updateweb.py -v       Same, but log debug messages also.
  25 #     $ python updateweb.py -c       Clean up my master copy only.
  26 #     $ python updateweb.py -t       Run unit tests only.
  27 #
  28 #     We get username and password information from the file PARAMETERS_FILE.
  29 #
  30 #     Logs are written to the files,
  31 #
  32 #         logMaster.txt       Master web site cleanup log.
  33 #         logRemote.txt       Remote web server update log.
  34 #
  35 # AUTHOR
  36 #
  37 #     Sean E. O'Connor        23 Aug 2007  Version 1.0 released.
  38 #     Sean E. O'Connor        18 May 2013  Version 4.2 released.
  39 #     Sean E. O'Connor        07 Nov 2015  Version 4.3 released.
  40 #     Sean E. O'Connor        22 Nov 2015  Version 4.4 released.
  41 #     Sean E. O'Connor        07 Feb 2017  Version 4.5 released.
  42 #     Sean E. O'Connor        04 Jun 2017  Version 4.6 released.
  43 #     Sean E. O'Connor        17 Dec 2017  Version 4.7 released.
  44 #     Sean E. O'Connor        15 Jan 2018  Version 4.8 released.
  45 #     Sean E. O'Connor        05 Jan 2019  Version 5.0 released.
  46 #     Sean E. O'Connor        24 Mar 2019  Version 5.1 released.
  47 #     Sean E. O'Connor        06 May 2019  Version 5.2 released.
  48 #     Sean E. O'Connor        08 May 2019  Version 5.3 released.
  49 #     Sean E. O'Connor        08 Oct 2019  Version 5.4 released.
  50 #     Sean E. O'Connor        24 Nov 2019  Version 5.5 released.
  51 #     Sean E. O'Connor        20 Apr 2019  Version 5.6 released.
  52 #     Sean E. O'Connor        20 Jun 2020  Version 5.7 released.
  53 #     Sean E. O'Connor        02 Jul 2020  Version 5.8 released.
  54 #     Sean E. O'Connor        10 Aug 2021  Version 5.9 released.
  55 #     Sean E. O'Connor        11 Sep 2021  Version 6.0 released.
  56 #     Sean E. O'Connor        30 Jan 2022  Version 6.1 released.
  57 #     Sean E. O'Connor        20 Mar 2022  Version 6.2 released.
  58 #     Sean E. O'Connor        03 Sep 2022  Version 6.3 released.
  59 #
  60 # LEGAL
  61 #
  62 #     updateweb.py Version 6.3 - A Python utility program which maintains my web site.
  63 #     Copyright (C) 2007-2024 by Sean Erik O'Connor.  All Rights Reserved.
  64 #
  65 #     This program is free software: you can redistribute it and/or modify
  66 #     it under the terms of the GNU General Public License as published by
  67 #     the Free Software Foundation, either version 3 of the License, or
  68 #     (at your option) any later version.
  69 #
  70 #     This program is distributed in the hope that it will be useful,
  71 #     but WITHOUT ANY WARRANTY; without even the implied warranty of
  72 #     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  73 #     GNU General Public License for more details.
  74 #
  75 #     You should have received a copy of the GNU General Public License
  76 #     along with this program.  If not, see <http://www.gnu.org/licenses/>.
  77 #
  78 #     The author's address is seanerikoconnor!AT!gmail!DOT!com
  79 #     with !DOT! replaced by . and the !AT! replaced by @
  80 #
  81 # NOTES
  82 #
  83 #    DOCUMENTATION
  84 #
  85 #    Python interpreter:               https://www.python.org/
  86 #    Python tutorial and reference:    https://docs.python.org/lib/lib.html
  87 #    Python debugger:                  https://docs.python.org/3/library/pdb.html
  88 #    Python regular expression howto:  https://docs.python.org/3.7/howto/regex.html
  89 #
  90 # ============================================================================
  91 
  92 # ----------------------------------------------------------------------------
  93 #  Load Python Packages
  94 # ----------------------------------------------------------------------------
  95 
  96 # OS stuff
  97 import sys
  98 import os
  99 import argparse
 100 import shutil
 101 from pathlib import Path
 102 
 103 # Regular expressions
 104 import re
 105 
 106 # FTP stuff
 107 import ftplib
 108 
 109 # Date and time
 110 import time
 111 import stat
 112 import datetime
 113 
 114 # Logging
 115 import logging
 116 
 117 # Unit testing
 118 import unittest
 119 
 120 # Enumerated types (v3.4)
 121 from enum import Enum
 122 
 123 
 124 # ----------------------------------------------------------------------------
 125 #  User settings.
 126 # ----------------------------------------------------------------------------
 127 
 128 # Enum types for how to walk the directory tree.
 129 class TreeWalk(Enum):
 130     BREADTH_FIRST_SEARCH = 1
 131     DEPTH_FIRST_SEARCH = 2
 132 
 133 
 134 # 'Enum' types for properties of directories and files.
 135 class FileType(Enum):
 136     DIRECTORY = 0
 137     FILE = 1
 138     ON_MASTER_ONLY = 2
 139     ON_REMOTE_ONLY = 3
 140     ON_BOTH_MASTER_AND_REMOTE = 4
 141 
 142 
 143 # Megatons of user selectable settings.
 144 class UserSettings:
 145     # Logging control.
 146     LOGFILENAME = ""
 147     VERBOSE = False  # Verbose mode.  Prints out everything.
 148     CLEANONLY = False  # Clean the local master website only.
 149     UNITTEST = False  # Run a unit test of a function.
 150 
 151     # When diving into the MathJax directory, web walking the deep directories
 152     # may exceed Python's default recursion limit of 1000.
 153     RECURSION_DEPTH = 5000
 154     sys.setrecursionlimit(RECURSION_DEPTH)
 155 
 156     # Fields in the file information (file_info) structure.
 157     FILE_NAME = 0
 158     FILE_TYPE = 1
 159     FILE_DATE_TIME = 2
 160     FILE_SIZE = 3
 161 
 162     # Parameter file which contains web server account login information for
 163     # FTP.
 164     PARAMETERS_FILE = "/private/param.txt"
 165 
 166     # Line numbers in the PARAMETERS_FILE, starting from 0.  All other lines
 167     # are comments, and are skipped.
 168     SERVER = 19
 169     USER = 20
 170     PASSWORD = 21
 171     FTP_ROOT = 22
 172     FILE_SIZE_LIMIT = 23
 173 
 174     # Map month names onto numbers.
 175     monthToNumber = {
 176         'Jan': 1,
 177         'Feb': 2,
 178         'Mar': 3,
 179         'Apr': 4,
 180         'May': 5,
 181         'Jun': 6,
 182         'Jul': 7,
 183         'Aug': 8,
 184         'Sep': 9,
 185         'Oct': 10,
 186         'Nov': 11,
 187         'Dec': 12}
 188 
 189     # List of directories to skip over when processing or uploading the web page.
 190     # Some are private but most are dir of temporary files.
 191     # They will be listed as WARNING in the log.
 192     # Examples:
 193     #     My private admin settings directory.
 194     #     The mathjax directory.
 195     #     Git or SVN local admin directories.
 196     #     Compile build directories fromXCode.
 197     #     PyCharm build directories.
 198     #     Jupyter checkpoint directories.
 199     #     XCode temporary file crap.
 200     DIR_TO_SKIP = "private|mathjax|.git|.github|.svn|build|XCodeOutput\
 201     |Debug|Release|PyCharm|.idea|.ipynb_checkpoints|ModuleCache.noindex|SymbolCache.noindex|Primpoly-[a-z]"
 202 
 203     # List of files to skip when processing or uploading to the web page.
 204     # They will be listed as WARNING in the log.
 205     # Examples:
 206     #     Git and SVN settings,
 207     #     MathJax yml file.
 208     #     .htaccess (because it doesn't show up on the output of ftp LIST, so we must upload manually)
 209     FILE_TO_SKIP = ".gitignore|.travis.yml|.svnignore|.htaccess"
 210 
 211     # File extension for text files.
 212     TEXT_FILE_EXT = ".txt"
 213 
 214     # Suffixes for temporary files which will be deleted during the cleanup
 215     # phase.
 216     TEMP_FILE_SUFFIXES = r"""        # Use Python raw strings.
 217         \.                           # Match the dot in the file name.
 218                                      # Now begin matching the file name suffix.
 219                                      # (?: non-capturing match for the regex inside the parentheses,
 220                                      #   i.e. matching string cannot be retrieved later.
 221                                      # Now match any of the following file extensions:
 222         (?: o   | obj | lib | exe |  #     Object files generated by C, C++, etc compilers
 223                               pyc |  #     Object file generated by the Python compiler
 224                   ilk | pdb | sup |  #     Temp files from VC++ compiler
 225             idb | ncb | opt | plg |  #     Temp files from VC++ compiler
 226             sbr | bsc | map | bce |  #     Temp files from VC++ compiler
 227             res | aps | dep | db  |  #     Temp files from VC++ compiler
 228                               jbf |  #     Paintshop Pro
 229                       class | jar |  #     Java compiler
 230                               log |  #     WS_FTP
 231                               fas |  #     CLISP compiler
 232                         swp | swo |  #     Vim editor
 233                               aux |  #     TeX auxilliary files.
 234           DS_Store  | _\.DS_Store |  #     macOS finder folder settings.
 235                        _\.Trashes |  #     macOS recycle bin
 236         gdb_history)                 #     GDB history
 237         $                            #     Now we should see only the end of line.
 238         """
 239 
 240     # Special case:  Vim temporary files contain a twiddle anywhere in the
 241     # name.
 242     VIM_TEMP_FILE_EXT = "~"
 243 
 244     # Suffixes for temporary directories which should be deleted during the
 245     # cleanup phase.
 246     TEMP_DIR_SUFFIX = r"""           # Use Python raw strings.
 247         (?: Debug | Release |        # C++ compiler
 248            ipch   | \.vs    |        # Temp directories from VC++ compiler
 249         \.Trashes | \.Trash)         # macOS recycle bin
 250         $
 251         """
 252 
 253     # File extension for an internally created temporary file.
 254     TEMP_FILE_EXT = ".new"
 255 
 256     # Identify source file types, including hypertext, CSS style sheets and
 257     # makefiles.
 258     SOURCE_FILE_SUFFIX = r"""       # Use Python raw strings.
 259         (?: makefile$               # Any file called makefile is a source file.
 260           |
 261           (\.                       # Match the filename suffix after the .
 262                                         # Now match any of these suffixes:
 263              (?: html | htm |           #     HTML hypertext
 264                   css |                 #     CSS style sheet
 265                   c | cpp | h | hpp |   #     C++ and C
 266                   js |                  #     Javascript
 267                   py |                  #     Python
 268                   lsp |                 #     LISP
 269                   m  |                  #     MATLAB
 270                   FOR | for | f |       #     FORTRAN
 271                   txt | dat |           #     Data files
 272                   sh | bashrc |         #     Bash
 273                   bash_profile |
 274                   bash_logout)
 275              $)
 276          )
 277          """
 278 
 279     # Update my email address.
 280     # This is tricky:  Prevent matching and updating the name within in this
 281     # Python source file by using the character class brackets.
 282     OLD_EMAIL_ADDRESS = r"""
 283         artificer\!AT\!sean[e]rikoconnor\!DOT\!freeservers\!DOT\!com
 284         """
 285     NEW_EMAIL_ADDRESS = "seanerikoconnor!AT!gmail!DOT!com"
 286 
 287     # Rewrite a line by replacing an old substring with a new substring from a
 288     # list of old/new string pairs.
 289     SUBSTRING_REPLACEMENT_LIST = \
 290         [
 291             [
 292                 # Note 1:  Since we are regular expressions in the pattern,
 293                 # we must escape special characters and replace all spaces by \s+
 294                 # Note 2:  Since we are using raw strings, leading and trailing whitespace is ignored.
 295                 r"""
 296                         <div\s+class="titlePageWrapper">
 297                 """,
 298                 # Note that since we are using raw strings leading and trailing whitespace
 299                 # is ignored in this replacement pattern.
 300                 r"""
 301                               <div class="wrapper titlePage">
 302                 """
 303             ],
 304             [
 305                 r"""
 306                             <div\s+class="scrollBox"\s+style="height:\s+50em\s+;">
 307                 """,
 308                 r"""
 309                        <div class="scrollBoxHuge">
 310                 """
 311             ]
 312         ]
 313 
 314     # Change all old software version lines of the form
 315     #      Primpoly Version nnnn.nnnn
 316     # to the new software version.
 317     # Note that since we are using raw strings leading and trailing whitespace
 318     # is ignored in both pattern and replacement.
 319     CURRENT_SOFTWARE_VERSION = r"""
 320         Primpoly
 321         \s+
 322         Version
 323         \s+
 324         ([0-9]+)   # The two part version number NNN.nnn
 325         \.
 326         ([0-9]+)
 327         """
 328     NEW_SOFTWARE_VERSION = r"""
 329         Primpoly Version 16.2
 330         """
 331 
 332     # Match a copyright line.  Then extract the copyright symbol which can be
 333     # (C) or &copy; and extract the old year.
 334     TWO_DIGIT_YEAR_FORMAT = "%02d"
 335     COPYRIGHT_LINE = r"""
 336         Copyright                       # Copyright.
 337         \D+                             # Any non-digits.
 338         (?P<symbol> \(C\) | &copy;)     # Match and extract the copyright symbol.
 339         \D+                             # Any non-digits.
 340         (?P<old_year>[0-9]+)            # Match and extract the old copyright year,
 341                                         # then place it into variable 'old_year'
 342         -                               # to
 343         ([0-9]+)                        # New copyright year.
 344         """
 345 
 346     # Match a line containing the words,
 347     #    last updated YY
 348     # and extract the two digit year YY.
 349     LAST_UPDATED_LINE = r"""
 350         last\s+         # Match the words "last updated"
 351         updated\s+
 352         \d+             # Day number
 353         \s+             # One or more blanks or tabs
 354         [A-Za-z]+       # Month
 355         \s+             # One or more blanks or tabs
 356         (?P<year>\d+)   # Two digit year.  Place it into the variable 'year'
 357         """
 358 
 359     # Web server root directory.
 360     DEFAULT_ROOT_DIR = "/"
 361 
 362     # The ftp listing occasionally shows a date much newer than the actual date when we are near New Year's Day.
 363     # Depends on the server.
 364     DAYS_NEWER_FOR_REMOTE_NEW_YEARS_GLITCH = 40
 365 
 366     # Upload only if we are newer by more than a few minutes.  Allows for a
 367     # little slop in time stamps on server or host.
 368     MINUTES_NEWER_FOR_MASTER_BEFORE_UPLOAD = 5.0
 369     DAYS_NEWER_FOR_MASTER_BEFORE_UPLOAD = (
 370                                                   1.0 / 24.0) * (1.0 / 60.0) * MINUTES_NEWER_FOR_MASTER_BEFORE_UPLOAD
 371 
 372     # An ftp list command line should be at least this many chars, or we'll
 373     # suspect and error.
 374     MIN_FTP_LINE_LENGTH = 7
 375 
 376     # Parse an ftp listing, extracting <bytes> <mon> <day> <hour> <min> <year> <filename>
 377     # ftp listings are generally similar to UNIX ls -l listings.
 378     #
 379     # Some examples:
 380     #
 381     # (1) Freeservers ftp listing,
 382     #
 383     #          0        1   2                3           4    5   6   7      8
 384     #     drwxr-xr-x    3 1000             1000         4096 Nov 18  2006 Electronics
 385     #     -rw-r--r--    1 1000             1000        21984 Jun  4 03:46 StyleSheet.css
 386     #     -rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html
 387     #
 388     # (2) atspace ftp listing,
 389     #
 390     #     drwxr-xr-x    3  seanerikoconnor vusers         49 Apr  7  2006 Electronics
 391     #     -rw-r--r--    1  seanerikoconnor vusers      21984 Jun  4 04:03 StyleSheet.css
 392     #
 393     FTP_LISTING = r"""
 394         [drwx-]+            # Unix type file mode.
 395         \s+                 # One or more blanks or tabs.
 396         \d+                 # Number of links.
 397         \s+
 398         \w+                 # Owner.
 399         \s+
 400         \w+                 # Group.
 401         \s+
 402         (?P<bytes> \d+)     # File size in bytes, placed into the variable 'bytes'.
 403         \s+
 404         (?P<mon> \w+)       # Month modified, placed into the variable 'mon'.
 405         \s+
 406         (?P<day> \d+)       # Day modified, placed into the variable 'day'.
 407         \s+
 408         (
 409             (?P<hour> \d+)  # Hour modified, placed into the variable 'hour'.
 410             :
 411             (?P<min> \d+)   # Minute modified, placed into the variable 'min'.
 412             |
 413             (?P<year> \d+)  # If hours and minutes are absent (happens when year is not the current year),
 414                             # extract the year instead.
 415         )
 416         \s+
 417         (?P<filename> [A-Za-z0-9"'.\-_,~()=+#]+)    # Path and file name containing letters, numbers,
 418                                                     # and funny characters.  We must escape some of
 419                                                     # these characters with a backslash, \.
 420         """
 421 
 422     def __init__(self):
 423         """Set up the user settings."""
 424 
 425         self.private_settings = []
 426         self.master_root_dir = ""
 427 
 428         # Import the user settings from the parameter file.
 429         self.get_master_root_dir()
 430         self.get_private_settings()
 431 
 432     def get_private_settings(self):
 433         """
 434         Read web account private settings from a secret offline parameter file.  Return an array of strings.
 435         e.g. self.private_settings[ 19 ] = "seanerikoconnor.freeservers.com", where the index 19 = UserSettings.SERVER
 436         """
 437 
 438         # Private file which contains my account settings.
 439         in_file_name = self.master_root_dir + self.PARAMETERS_FILE
 440 
 441         try:
 442             fin = open(in_file_name, "r")
 443         except IOError as detail:
 444             logging.error(
 445                 f"Cannot open the private settings file {in_file_name:s}: {str(detail):s}.  Aborting...")
 446             sys.exit()
 447 
 448         # Read each line of the file, aborting if there is a read error.
 449         try:
 450             line = fin.readline()
 451             while line:
 452                 # Strip off leading and trailing whitespace.
 453                 self.private_settings.append(line.strip())
 454                 line = fin.readline()
 455             fin.close()
 456         except Exception as detail:
 457             logging.error(
 458                 f"File I/O error reading private settings file {in_file_name:s}: {str(detail):s}.  Aborting...")
 459             sys.exit()
 460 
 461         return
 462 
 463     def get_master_root_dir(self):
 464         """Get the master website root directory on this platform."""
 465 
 466         # Each platform has a definite directory for the web page.
 467         local_web_dir_path = "/Desktop/Sean/WebSite"
 468 
 469         if sys.platform.startswith('darwin'):
 470             self.master_root_dir = str(Path.home()) + local_web_dir_path
 471         # My Cyperpower PC running Ubuntu Linux.
 472         elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
 473             self.master_root_dir = str(Path.home()) + local_web_dir_path
 474         return
 475 
 476 
 477 # ----------------------------------------------------------------------------
 478 #  Helper functions
 479 # ----------------------------------------------------------------------------
 480 
 481 # Pattern match a regular expression on a string, ignoring case.
 482 def pattern_match(regular_expression, search_string):
 483     pat = re.compile(regular_expression, re.VERBOSE | re.IGNORECASE)
 484     match = pat.search(search_string)
 485     return [pat, match]
 486 
 487 
 488 # ----------------------------------------------------------------------------
 489 #  Unit test some of the individual functions.
 490 # ----------------------------------------------------------------------------
 491 
 492 class UnitTest(unittest.TestCase):
 493     # Initialize the UnitTest class.
 494     def setUp(self):
 495         self.user_settings = UserSettings()
 496         self.user_settings.get_master_root_dir()
 497         self.private_settings = self.user_settings.private_settings
 498 
 499     def tearDown(self):
 500         self.user_settings = None
 501         self.private_settings = None
 502 
 503     # Test whether user settings are correctly initialized.
 504     def test_user_settings(self):
 505         computed = f"File size limit = {int(self.private_settings[self.user_settings.FILE_SIZE_LIMIT]):d} K"
 506         actual = "File size limit = 50000 K"
 507         self.assertEqual(
 508             computed,
 509             actual,
 510             "File size limit settings are incorrect.")
 511 
 512     # Test copyright line updating from any old year to the current year.
 513     def test_copyright_updating(self):
 514         old_line = "Copyright (C) 1999-2024 by Sean Erik O'Connor.  All Rights Reserved.\
 515           Copyright &copy; 1999-2024 by Sean Erik O'Connor"
 516         new_line = "Copyright (C) 1999-2024 by Sean Erik O'Connor.  All Rights Reserved.\
 517           Copyright &copy; 1999-2024 by Sean Erik O'Connor"
 518         [pat, match] = pattern_match(
 519             self.user_settings.COPYRIGHT_LINE, old_line)
 520         if match:
 521             old_year = int(match.group('old_year'))
 522             # Same as call to self.get_current_year():
 523             current_year = int(time.gmtime()[0])
 524             if old_year < current_year:
 525                 # We matched and extracted the old copyright symbol into the variable
 526                 # 'symbol' using the pattern syntax (?P<symbol> \(C\) | &copy;)
 527                 # We now insert it back by placing the special syntax
 528                 # \g<symbol> into the replacement string.
 529                 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
 530                                 str(current_year)
 531                 updated_line = pat.sub(new_copyright, old_line)
 532                 self.assertEqual(
 533                     new_line,
 534                     updated_line,
 535                     f"newline = |{new_line:s}| updated_line = |{updated_line:s}|")
 536             else:
 537                 self.fail()
 538         else:
 539             self.fail()
 540 
 541     # Test updating to a new version of Primpoly.
 542     def test_update_software_version(self):
 543         old_version_line = "|     Primpoly Version 16.2 - A Program for Computing Primitive Polynomials.|"
 544         new_version_line = "|     Primpoly Version 16.2 - A Program for Computing Primitive Polynomials.|"
 545         [pat, match] = pattern_match(
 546             self.user_settings.CURRENT_SOFTWARE_VERSION, old_version_line)
 547         if match:
 548             # Note that since we are using raw strings leading and trailing
 549             # whitespace is ignored.
 550             new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
 551             updated_version_line = pat.sub(new_version, old_version_line)
 552             self.assertEqual(
 553                 updated_version_line,
 554                 new_version_line,
 555                 f"updated version line = {updated_version_line:s} new line = {new_version_line:s}")
 556         else:
 557             self.fail()
 558 
 559     # Test parsing an FTP listing.
 560     def test_extract_filename_from_ftp_listing(self):
 561         ftp_line = "-rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html"
 562         extracted_file_name = "allclasses-frame.html"
 563         # Pattern is assigned to _ and thrown away to suppress unused variable
 564         # warnings.
 565         [_, match] = pattern_match(self.user_settings.FTP_LISTING, ftp_line)
 566         if match:
 567             filename = match.group('filename')
 568             self.assertEqual(
 569                 filename,
 570                 extracted_file_name,
 571                 f"ftp_line = {ftp_line:s} extracted file name = {extracted_file_name:s}")
 572         else:
 573             self.fail()
 574 
 575     # Test our substring pattern match and replacement function.
 576     def test_check_replace_substring(self):
 577         # Escape all " in the string using \".  When using more than one string
 578         # per line do +\ at the end of the line.
 579         old_line = "<div class=\"titlePageWrapper\">" + \
 580                    "<div class=\"scrollBox\" style=\"height: 50em ;\"> <div class=\"scrollBoxContent\">"
 581         new_line = "<div class=\"wrapper titlePage\">" + \
 582                    "<div class=\"scrollBoxHuge\"> <div class=\"scrollBoxContent\">"
 583 
 584         # Do the replacements in order from first to last.
 585         line = old_line
 586         for match_replace_pair in self.user_settings.SUBSTRING_REPLACEMENT_LIST:
 587             # Search for the pattern.
 588             [pat, match] = pattern_match(match_replace_pair[0], line)
 589             # print( f"\nline = |{line}|\n")
 590             # print( f"\nmatch_replace_pair[0] = |{match_replace_pair[0]}|\n")
 591             # print( f"\nmatch_replace_pair[1] = |{match_replace_pair[1]}|\n")
 592             # print( f"\npat = {pat} match = {match}\n" )
 593 
 594             # Replace with the new pattern.  Since we use raw strings, we need
 595             # to strip off leading and trailing whitespace.
 596             if match:
 597                 new_substring = match_replace_pair[1].strip().lstrip()
 598                 sub = pat.sub(new_substring, line)
 599                 # print(f"\ntransform old line = \n{line:s}\ninto new line =\n{sub:s}\n\
 600                 # using new substring =\n{new_substring:s}\n")
 601                 line = sub
 602                 # print( f" (after current substitution, line = |{line:s}| ) " )
 603 
 604         # At this point, we have done complete list of serial substitutions online.
 605         rewritten_line = line
 606         self.assertEqual(
 607             new_line,
 608             rewritten_line,
 609             f"\n      new_line = |{new_line:s}|\nrewritten_line = |{rewritten_line:s}|\n")
 610 
 611     # Test file time and date.
 612     def test_file_time_and_date(self):
 613         file_name = "/Electronics/Images/PowerSupply1Schematic.psd"
 614         full_file_name = self.user_settings.master_root_dir + file_name
 615         file_epoch_time = os.path.getmtime(full_file_name)
 616         file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 617         d = datetime.datetime(
 618             file_time_utc[0],
 619             file_time_utc[1],
 620             file_time_utc[2],
 621             file_time_utc[3],
 622             file_time_utc[4],
 623             file_time_utc[5])
 624         computed = f"file {file_name:s} datetime {d.ctime():s}"
 625         actual = "file /Electronics/Images/PowerSupply1Schematic.psd datetime Tue Jan  3 05:16:49 2023"
 626         self.assertEqual(computed, actual)
 627 
 628     # Test pattern matching directories we want to skip over.
 629     def test_pattern_match_dir_to_skip(self):
 630         dir_skip = "Primpoly-cswhfrwgwdikgzfdpiorbeaiennz"
 631         pat = re.compile(self.user_settings.DIR_TO_SKIP)
 632         if pat.search(dir_skip):
 633             self.assertTrue(True)
 634         else:
 635             self.assertTrue(False)
 636 
 637 
 638 # ----------------------------------------------------------------------------
 639 #  Main function
 640 # ----------------------------------------------------------------------------
 641 
 642 
 643 def main(raw_args=None):
 644     """Main program.  Clean up and update my website."""
 645 
 646     # Print the obligatory legal notice.
 647     print("""
 648     updateweb Version 6.3 - A Python utility program which maintains my web site.
 649     Copyright (C) 2007-2024 by Sean Erik O'Connor.  All Rights Reserved.
 650 
 651     It deletes temporary files, rewrites old copyright lines and email address
 652     lines in source files, then synchronizes all changes to my web sites.
 653 
 654     updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
 655     GNU General Public License.  This is free software, and you are welcome
 656     to redistribute it under certain conditions; see the GNU General Public
 657     License for details.
 658     """)
 659 
 660     # ---------------------------------------------------------------------
 661     #  Load default settings and start logging.
 662     # ---------------------------------------------------------------------
 663 
 664     # Default user settings.
 665     user_settings = UserSettings()
 666 
 667     print(
 668         f"Running main( {raw_args} ) Python version {sys.version_info[0]:d}.\
 669         {sys.version_info[1]:d}.{sys.version_info[2]:d} local web directory\
 670         {user_settings.master_root_dir}\n")
 671 
 672     # Get command line options such as --verbose.  Pass them back as flags in
 673     # user_settings.
 674     CommandLineSettings(user_settings, raw_args)
 675 
 676     # Load all unit test functions named test_* from UnitTest class, run the
 677     # tests and exit.
 678     if user_settings.UNITTEST:
 679         suite = unittest.TestLoader().loadTestsFromTestCase(UnitTest)
 680         unittest.TextTestRunner(verbosity=2).run(suite)
 681         sys.exit()
 682 
 683     # Start logging to file.  Verbose turns on logging for
 684     # DEBUG, INFO, WARNING, ERROR, and CRITICAL levels,
 685     # otherwise we log only WARNING, ERROR, and CRITICAL levels.
 686     if user_settings.VERBOSE:
 687         loglevel = logging.DEBUG
 688     else:
 689         loglevel = logging.WARNING
 690 
 691     # Pick the log file name on the host.
 692     if user_settings.CLEANONLY:
 693         user_settings.LOGFILENAME = "/private/logMaster.txt"
 694     else:
 695         user_settings.LOGFILENAME = "/private/logRemote.txt"
 696 
 697     logging.basicConfig(
 698         level=loglevel,
 699         format='%(asctime)s %(levelname)-8s %(message)s',
 700         datefmt='%a, %d %b %Y %H:%M:%S',
 701         filename=user_settings.master_root_dir + user_settings.LOGFILENAME,
 702         filemode='w')
 703 
 704     logging.debug("*** Begin logging ******************************")
 705 
 706     # ---------------------------------------------------------------------
 707     #  Scan the master website, finding out all files and directories.
 708     # ---------------------------------------------------------------------
 709     try:
 710         logging.debug("Scanning master (local on disk) web site")
 711         master = MasterWebSite(user_settings)
 712 
 713         print(f"Local web site directory = {user_settings.master_root_dir}")
 714         # Suppress newline and flush output buffer, so we can see the message
 715         # right away.
 716         print("Scanning and cleaning local web site...", end='', flush=True)
 717 
 718         master.scan()
 719 
 720         # Clean up the directory by rewriting source code and hypertext and
 721         # removing temporary files.
 722         logging.debug("Cleaning up master (local on disk) web site")
 723         changed = master.clean()
 724 
 725         # Rescan if any changes happened.
 726         if changed:
 727             logging.debug("Detected changes due to to cleanup.")
 728             master.finish()
 729             logging.debug("Disposing of the old scan.")
 730             del master
 731 
 732             master = MasterWebSite(user_settings)
 733             logging.debug("*** Rescanning ****************************")
 734             master.scan()
 735         else:
 736             logging.debug("No changes detected.  Keeping the original scan.")
 737 
 738         print("...done!", flush=True)
 739 
 740         # Master website directories.
 741         master_directory_list = master.directories
 742 
 743         # Master website filenames only.
 744         master_files_list = [file_info[user_settings.FILE_NAME]
 745                              for file_info in master.files]
 746 
 747         logging.debug("*** Master Directories **********************")
 748         for d in master_directory_list:
 749             logging.debug(f"\t {d:s} (directory)")
 750 
 751         logging.debug("*** Master Files **********************")
 752         for f in master_files_list:
 753             logging.debug(f"\t {f:s} (file)")
 754 
 755         master.finish()
 756 
 757         # Clean up master website only.  Don't update remote websites.
 758         if user_settings.CLEANONLY:
 759             logging.debug("Cleanup finished.  Exiting...")
 760             sys.exit()
 761 
 762         # ---------------------------------------------------------------------
 763         #  Scan the remote hosted web site.
 764         # ---------------------------------------------------------------------
 765 
 766         logging.debug("Reading private settings.")
 767         private_settings = user_settings.private_settings
 768 
 769         print("Scanning remote web site...", end='', flush=True)
 770 
 771         # Pick which website to update.
 772         logging.debug("Connecting to primary remote site.")
 773         remote = RemoteWebSite(user_settings,
 774                                private_settings[user_settings.SERVER],
 775                                private_settings[user_settings.USER],
 776                                private_settings[user_settings.PASSWORD],
 777                                private_settings[user_settings.FTP_ROOT])
 778 
 779         logging.debug("Scanning remote web site")
 780         remote.scan()
 781         remote.finish()
 782 
 783         print("...done!", flush=True)
 784 
 785         remote_directory_list = remote.directories
 786         remote_files_list = [file_info[user_settings.FILE_NAME]
 787                              for file_info in remote.files]
 788 
 789         logging.debug("*** Remote Directories **********************")
 790         for d in remote_directory_list:
 791             logging.debug(f"\t remote dir:  {d:s}")
 792 
 793         logging.debug("*** Remote Files **********************")
 794         for f in remote_files_list:
 795             logging.debug(f"\t remote file: {f:s}")
 796 
 797         # ---------------------------------------------------------------------
 798         # Synchronize the master and remote web sites.
 799         # ---------------------------------------------------------------------
 800 
 801         print("Synchronizing remote and local web sites...", end='', flush=True)
 802 
 803         # Primary website.
 804         logging.debug("Connecting to primary remote site for synchronization.")
 805         u = UpdateWeb(user_settings,
 806                       private_settings[user_settings.SERVER],
 807                       private_settings[user_settings.USER],
 808                       private_settings[user_settings.PASSWORD],
 809                       private_settings[user_settings.FTP_ROOT],
 810                       private_settings[user_settings.FILE_SIZE_LIMIT],
 811                       master.directories,
 812                       master.files,
 813                       remote.directories,
 814                       remote.files)
 815 
 816         logging.debug("Synchronizing remote web site")
 817         u.update()
 818         u.finish()
 819 
 820         print("...done!", flush=True)
 821 
 822         del u
 823         del remote
 824         del master
 825 
 826     except RecursionError as detail:
 827         logging.error(
 828             f"Walking the directory tree got too deep for Python's recursion {str(detail):s}.  Aborting...")
 829         sys.exit()
 830 
 831     return
 832 
 833 
 834 # ----------------------------------------------------------------------------
 835 #  Command line option class
 836 # ----------------------------------------------------------------------------
 837 
 838 class CommandLineSettings(object):
 839     """Get the command line options."""
 840 
 841     def __init__(self, user_settings, raw_args=None):
 842         """Get command line options"""
 843         command_line_parser = argparse.ArgumentParser(
 844             description="updateweb options")
 845 
 846         # Log all changes, not just warnings and errors.
 847         command_line_parser.add_argument(
 848             "-v",
 849             "--verbose",
 850             help="Turn on verbose mode to log everything",
 851             action="store_true")
 852 
 853         # Clean up the master website only.
 854         command_line_parser.add_argument(
 855             "-c",
 856             "--cleanonly",
 857             help="Do a cleanup on the master web site only.",
 858             action="store_true")
 859 
 860         # Run unit tests only.
 861         command_line_parser.add_argument("-t", "--test",
 862                                          help="Run unit tests.",
 863                                          action="store_true")
 864 
 865         args = command_line_parser.parse_args(raw_args)
 866 
 867         if args.verbose:
 868             user_settings.VERBOSE = True
 869 
 870         if args.cleanonly:
 871             user_settings.CLEANONLY = True
 872 
 873         if args.test:
 874             user_settings.UNITTEST = True
 875 
 876 
 877 # ----------------------------------------------------------------------------
 878 #  Base class for web site processing.
 879 # ----------------------------------------------------------------------------
 880 
 881 class WebSite(object):
 882     """
 883     Abstract class used for analyzing both master (local to disk) and remote (ftp server) websites.
 884     Contains the common web-walking functions which traverse the directory structures and files.
 885     Subclasses fill in the lower level functions which actually access the directories and files.
 886     Subclasses may also define additional functions unique to local websites.
 887     """
 888 
 889     def __init__(self, settings):
 890         """Set up root directories"""
 891 
 892         # Import the user settings.
 893         self.user_settings = settings
 894 
 895         # Queue keeps track of directories not yet processed.
 896         self.queue = []
 897 
 898         # List of all directories traversed.
 899         self.directories = []
 900 
 901         # List of files traversed, with file information.
 902         self.files = []
 903 
 904         # Find out the root directory and go there.
 905         self.root_dir = self.get_root_dir()
 906         self.go_to_root_dir(self.root_dir)
 907 
 908     @staticmethod
 909     def get_current_year():
 910         """Get the current year."""
 911         return int(time.gmtime()[0])
 912 
 913     @staticmethod
 914     def get_current_two_digit_year():
 915         """Get the last two digits of the current year."""
 916         return WebSite.get_current_year() % 100
 917 
 918     @staticmethod
 919     def is_file_info_type(file_info):
 920         """Check if we have a file information structure or merely a simple file name."""
 921         try:
 922             if isinstance(file_info, list):
 923                 return True
 924             elif isinstance(file_info, str):
 925                 return False
 926             else:
 927                 logging.error(
 928                     "is_file_info_type found a bad type.  Aborting...")
 929                 sys.exit()
 930         except TypeError as detail:
 931             logging.error(
 932                 f"is_file_info_type found a bad type {str(detail):s}.  Aborting...")
 933             sys.exit()
 934 
 935     def get_root_dir(self):
 936         """Subclass:  Put code here to get the root directory"""
 937         return ""
 938 
 939     def go_to_root_dir(self, root_dir):
 940         """Subclass:  Put code here to go to the root directory"""
 941         pass  # Pythons's do-nothing statement.
 942 
 943     def one_level_down(self, d):
 944         """Subclass:  Fill in with a method which returns a list of the
 945         directories and files immediately beneath dir"""
 946         return [], []
 947 
 948     def walk(self, d, type_of_tree_search=TreeWalk.BREADTH_FIRST_SEARCH):
 949         """Walk a directory in either depth first or breadth first order.  BFS is the default."""
 950 
 951         # Get all subfiles and subdirectories off this node.
 952         subdirectories, subfiles = self.one_level_down(d)
 953 
 954         # Add all the subfiles in order.
 955         for f in subfiles:
 956 
 957             name = self.strip_root(f)
 958             logging.debug(
 959                 f"Webwalking:  Adding file {name[self.user_settings.FILE_NAME]:s} to list.")
 960 
 961             # Some files are private so skip them from consideration.
 962             pat = re.compile(self.user_settings.FILE_TO_SKIP)
 963 
 964             if pat.search(name[self.user_settings.FILE_NAME]):
 965                 logging.warning(
 966                     f"Webwalking:  Skipping private file {name[self.user_settings.FILE_NAME]:s}")
 967             # Don't upload the log file due to file locking problems.
 968             elif name[self.user_settings.FILE_NAME].find(self.user_settings.LOGFILENAME) >= 0:
 969                 logging.debug(
 970                     f"Webwalking:  Skipping log file {name[self.user_settings.FILE_NAME]:s}")
 971             # File size limit on some servers.
 972             else:
 973                 self.files.append(name)
 974 
 975         # Queue up the subdirectories.
 976         for d in subdirectories:
 977 
 978             # Some directories are private such as .git or just temporary file
 979             # caches so skip them from consideration.
 980             pat = re.compile(self.user_settings.DIR_TO_SKIP)
 981             if pat.search(d):
 982                 logging.warning(f"Webwalking:  Skipping private dir {d:s}")
 983             else:
 984                 logging.debug(f"Webwalking:  Pushing dir {d:s} on the queue.")
 985                 self.queue.append(d)
 986 
 987         # Search through the directories.
 988         while len(self.queue) > 0:
 989             # For breadth first search, remove from beginning of queue.
 990             if type_of_tree_search == TreeWalk.BREADTH_FIRST_SEARCH:
 991                 d = self.queue.pop(0)
 992 
 993             # For depth first search, remove from end of queue.
 994             elif type_of_tree_search == TreeWalk.DEPTH_FIRST_SEARCH:
 995                 d = self.queue.pop()
 996             else:
 997                 d = self.queue.pop(0)
 998 
 999             name = self.strip_root(d)
1000             logging.debug(
1001                 f"Webwalking:  Adding relative directory {name:s} to list, full path = {d:s}.")
1002             self.directories.append(name)
1003 
1004             self.walk(d)
1005 
1006     def strip_root(self, file_info):
1007         """Return a path, but strip off the root directory"""
1008 
1009         root = self.root_dir
1010 
1011         # Extract the file name.
1012         if self.is_file_info_type(file_info):
1013             name = file_info[self.user_settings.FILE_NAME]
1014         else:
1015             name = file_info
1016 
1017         # e.g. root = / and name = /Art/foo.txt yields stripped_path = Art/foo.txt
1018         # but root = /Sean and name = /Sean/Art/foo.txt yields stripped_path =
1019         # Art/foo.txt
1020         lenroot = len(root)
1021         if root == self.user_settings.DEFAULT_ROOT_DIR:
1022             pass
1023         else:
1024             lenroot = lenroot + 1
1025 
1026         stripped_path = name[lenroot:]
1027 
1028         if self.is_file_info_type(file_info):
1029             # Update the file name only.
1030             return [stripped_path,
1031                     file_info[self.user_settings.FILE_TYPE],
1032                     file_info[self.user_settings.FILE_DATE_TIME],
1033                     file_info[self.user_settings.FILE_SIZE]]
1034         else:
1035             return stripped_path
1036 
1037     def append_root_dir(self, root_dir, name):
1038         """Append the root directory to a path"""
1039 
1040         # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1041         # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1042         if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1043             return root_dir + name
1044         else:
1045             return root_dir + "/" + name
1046 
1047     def scan(self):
1048         """Scan the directory tree recursively from the root"""
1049         logging.debug(
1050             f"Webwalking:  Beginning recursive directory scan from root directory {self.root_dir:s}")
1051         self.walk(self.root_dir)
1052 
1053     def modtime(self, f):
1054         """Subclass:  Get file modification time"""
1055         pass
1056 
1057     def finish(self):
1058         """Quit web site walking"""
1059         logging.debug("Finished webwalking the master.")
1060         pass
1061 
1062     def remove_dir(self, dir_name):
1063         """Subclass:  Remove a directory"""
1064         pass
1065 
1066     def remove_file(self, file_name):
1067         """Subclass:  Remove a file"""
1068         pass
1069 
1070     def clean(self):
1071         """Scan through all directories and files in the master on disk website and clean them up."""
1072         num_changes = 0
1073 
1074         logging.debug("Cleaning up the master web page.")
1075 
1076         if self.directories is None or self.files is None:
1077             logging.error("Web site has no directories or files.  Aborting...")
1078             sys.exit()
1079 
1080         for d in self.directories:
1081 
1082             if self.is_temp_dir(d):
1083                 # Add the full path prefix from the root.
1084                 name = self.append_root_dir(self.get_root_dir(), d)
1085                 try:
1086                     logging.debug(
1087                         f"Removing temp dir {self.root_dir:s} recursively")
1088                     shutil.rmtree(name)
1089                     num_changes += 1
1090                 except OSError as detail:
1091                     logging.error(
1092                         f"Cannot remove temp dir {name:s}: {str(detail):s}")
1093 
1094         for f in self.files:
1095 
1096             # Add the full path prefix from the root.
1097             name = self.append_root_dir(
1098                 self.get_root_dir(), f[self.user_settings.FILE_NAME])
1099 
1100             # Remove all temporary files.
1101             if self.is_temp_file(f):
1102                 try:
1103                     logging.debug(f"Removing temp file {name:s}")
1104                     os.remove(name)
1105                     num_changes += 1
1106                 except OSError as detail:
1107                     logging.error(
1108                         f"Cannot remove temp dir {name:s}: {str(detail):s}")
1109 
1110             # Update hypertext files.
1111             if self.is_source_file(f):
1112                 changed = self.rewrite_source_file(name)
1113                 if changed:
1114                     num_changes += 1
1115                     logging.debug(f"Rewrote hypertext file {self.root_dir:s}")
1116 
1117         # Flag that at least one file was changed.
1118         if num_changes > 0:
1119             return True
1120 
1121         return False
1122 
1123     def is_temp_file(self, file_info):
1124         """Identify a file name as a temporary file"""
1125 
1126         file_name = file_info[self.user_settings.FILE_NAME]
1127 
1128         # Suffixes and names for temporary files be deleted.
1129         # Pattern is assigned to _ and thrown away to suppress unused variable
1130         # warnings.
1131         [_, match] = pattern_match(
1132             self.user_settings.TEMP_FILE_SUFFIXES, file_name)
1133         # Remove any files containing twiddles anywhere in the name.
1134         if match or file_name.find(self.user_settings.VIM_TEMP_FILE_EXT) >= 0:
1135             return True
1136 
1137         return False
1138 
1139     def is_temp_dir(self, dir_name):
1140         """Identify a name as a temporary directory."""
1141 
1142         p = re.compile(self.user_settings.TEMP_DIR_SUFFIX, re.VERBOSE)
1143         return p.search(dir_name)
1144 
1145     def is_source_file(self, file_info):
1146         """ Check if the file name is a hypertext file."""
1147 
1148         file_name = file_info[self.user_settings.FILE_NAME]
1149         p = re.compile(self.user_settings.SOURCE_FILE_SUFFIX, re.VERBOSE)
1150         return p.search(file_name)
1151 
1152     def copy_to_text_file(self, file_name):
1153         """Make a copy of a file with a .txt extension"""
1154         pass
1155 
1156     def clean_up_temp_file(self, temp_file_name, file_name, changed):
1157         """Remove the original file, rename the temporary file name to the original name.
1158         If there are no changes, just remove the temporary file.
1159         """
1160         pass
1161 
1162     def process_lines_of_file(
1163             self,
1164             in_file_name,
1165             out_file_name,
1166             process_line_function_list=None):
1167         """Process each line of a file with a list of functions.  Create a new temporary file.
1168         The default list is None which means make an exact copy.
1169         """
1170         pass
1171 
1172     def rewrite_substring(self, line):
1173         """Rewrite a line containing a pattern of your choice"""
1174 
1175         # Do the replacements in order from first to last.
1176         for match_replace_pair in self.user_settings.SUBSTRING_REPLACEMENT_LIST:
1177 
1178             # Search for the pattern.
1179             [pat, match] = pattern_match(match_replace_pair[0], line)
1180 
1181             # Replace with the new pattern.
1182             if match:
1183                 # Replace with the new pattern.  Since we use raw strings, we
1184                 # need to strip off leading and trailing whitespace.
1185                 new_substring = match_replace_pair[1].strip().lstrip()
1186                 sub = pat.sub(new_substring, line)
1187                 logging.debug(
1188                     f"\ntransform old line = \n{line:s}\ninto new line =\n\
1189                     {sub:s}\nusing new substring =\n{new_substring:s}\n")
1190                 line = sub
1191 
1192         return line
1193 
1194     def rewrite_email_address_line(self, line):
1195         """Rewrite lines containing old email addresses."""
1196 
1197         # Search for the old email address.
1198         [pat, match] = pattern_match(
1199             self.user_settings.OLD_EMAIL_ADDRESS, line)
1200 
1201         # Replace the old address with my new email address.
1202         if match:
1203             new_address = self.user_settings.NEW_EMAIL_ADDRESS
1204             sub = pat.sub(new_address, line)
1205             line = sub
1206 
1207         return line
1208 
1209     def rewrite_version_line(self, line):
1210         """Rewrite lines containing the current version of software."""
1211 
1212         # Search for the current version.
1213         [pat, match] = pattern_match(
1214             self.user_settings.CURRENT_SOFTWARE_VERSION, line)
1215 
1216         # Replace with the new version.
1217         if match:
1218             # Note that since we are using raw strings leading and trailing
1219             # whitespace is ignored.
1220             new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
1221             sub = pat.sub(new_version, line)
1222             line = sub
1223 
1224         return line
1225 
1226     def rewrite_copyright_line(self, line):
1227         """Rewrite copyright lines if they are out of date."""
1228 
1229         # Match the lines,
1230         #     Copyright (C) nnnn-mmmm by Sean Erik O'Connor.
1231         #     Copyright &copy; nnnn-mmmm by Sean Erik O'Connor.
1232         # and pull out the old year and save it.
1233         [pat, match] = pattern_match(self.user_settings.COPYRIGHT_LINE, line)
1234 
1235         # Found a match.
1236         if match:
1237             old_year = int(match.group('old_year'))
1238 
1239             # Replace the old year with the current year.
1240             # We matched and extracted the old copyright symbol into the variable
1241             # 'symbol' using the pattern syntax (?P<symbol> \(C\) | &copy;)
1242             # We now insert it back by placing the special syntax \g<symbol>
1243             # into the replacement string.
1244             if old_year < WebSite.get_current_year():
1245                 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
1246                                 str(WebSite.get_current_year())
1247                 sub = pat.sub(new_copyright, line)
1248                 line = sub
1249         return line
1250 
1251     def rewrite_last_update_line(self, line):
1252         """Rewrite the Last Updated line if the year is out of date."""
1253 
1254         # Match the last updated line and pull out the year.
1255         #      last updated 01 Jan 24.
1256         p = re.compile(
1257             self.user_settings.LAST_UPDATED_LINE,
1258             re.VERBOSE | re.IGNORECASE)
1259         m = p.search(line)
1260 
1261         if m:
1262             last_update_year = int(m.group('year'))
1263 
1264             # Convert to four digit years.
1265             if last_update_year > 90:
1266                 last_update_year += 1900
1267             else:
1268                 last_update_year += 2000
1269 
1270             # If the year is old, rewrite to "01 Jan <current year>".
1271             if last_update_year < WebSite.get_current_year():
1272                 two_digit_year = self.user_settings.TWO_DIGIT_YEAR_FORMAT % self.get_current_two_digit_year()
1273                 sub = p.sub('last updated 01 Jan ' + two_digit_year, line)
1274                 line = sub
1275 
1276         return line
1277 
1278     def rewrite_source_file(self, file_name):
1279         """Rewrite copyright lines, last updated lines, etc."""
1280         changed = False
1281 
1282         # Create a new temporary file name for the rewritten file.
1283         temp_file_name = file_name + self.user_settings.TEMP_FILE_EXT
1284 
1285         # Apply changes to all lines of the file.  Apply change functions in
1286         # the sequence listed.
1287         if self.process_lines_of_file(file_name, temp_file_name,
1288                                       [self.rewrite_copyright_line,
1289                                        self.rewrite_last_update_line,
1290                                        self.rewrite_email_address_line,
1291                                        self.rewrite_substring,
1292                                        self.rewrite_version_line]):
1293             changed = True
1294 
1295         # Rename the temp file to the original file name.  If no changes, just
1296         # delete the temp file.
1297         self.clean_up_temp_file(temp_file_name, file_name, changed)
1298 
1299         return changed
1300 
1301 
1302 # ----------------------------------------------------------------------------
1303 #  Subclass for local web site processing.
1304 # ----------------------------------------------------------------------------
1305 
1306 class MasterWebSite(WebSite):
1307     """Walk the master web directory on local disk down from the root.
1308     Clean up temporary files and do other cleanup work."""
1309 
1310     def __init__(self, settings):
1311         """Go to web page root and list all files and directories."""
1312 
1313         # Initialize the parent class.
1314         WebSite.__init__(self, settings)
1315 
1316         self.root_dir = self.get_root_dir()
1317         logging.debug(
1318             f"MasterWebSite.__init__():  \tRoot directory: {self.root_dir:s}")
1319 
1320     def get_root_dir(self):
1321         """Get the name of the root directory"""
1322         return self.user_settings.master_root_dir
1323 
1324     def go_to_root_dir(self, root_dir):
1325         """Go to the root directory"""
1326 
1327         # Go to the root directory.
1328         logging.debug(
1329             f"MasterWebSite.go_to_root_dir():  \tchdir to root directory:  {root_dir:s}")
1330         os.chdir(root_dir)
1331 
1332         # Read it back.
1333         self.root_dir = os.getcwd()
1334         logging.debug(
1335             f"MasterWebSite.go_to_root_dir():  \tgetcwd root directory:  {self.root_dir:s}")
1336 
1337     def one_level_down(self, d):
1338         """List all files and subdirectories in the current directory, dir.  For files, collect file info
1339         such as time, date and size."""
1340 
1341         directories = []
1342         files = []
1343 
1344         # Change to current directory.
1345         os.chdir(d)
1346 
1347         # List all subdirectories and files.
1348         dir_list = os.listdir(d)
1349 
1350         if dir_list:
1351             for line in dir_list:
1352                 logging.debug(
1353                     f"MasterWebSite.one_level_down():  \tlistdir( {d:s} ) =  {line:s}")
1354 
1355                 # Add the full path prefix from the root.
1356                 name = self.append_root_dir(d, line)
1357                 logging.debug(
1358                     f"MasterWebSite.one_level_down():  \tmaster dir/file (full path): {name:s}")
1359 
1360                 # Is it a directory or a file?
1361                 if os.path.isdir(name):
1362                     directories.append(name)
1363                 elif os.path.isfile(name):
1364                     # First assemble the file information of name, time/date and size into a list.
1365                     # Can index it like an array.
1366                     # e.g. file_info = [ '/WebDesign/EquationImages/equation001.png', 1, \
1367                     # datetime.datetime(2010, 2, 3, 17, 15), 4675]
1368                     #     file_info[ 0 ] = '/WebDesign/EquationImages/equation001.png'
1369                     #     file_info[ 3 ] = 4675
1370                     file_info = [name,
1371                                  FileType.FILE,
1372                                  self.get_file_date_time(name),
1373                                  self.get_file_size(name)]
1374                     files.append(file_info)
1375 
1376         # Sort the names into order.
1377         if directories:
1378             directories.sort()
1379         if files:
1380             files.sort()
1381 
1382         return directories, files
1383 
1384     @staticmethod
1385     def get_file_date_time(file_name):
1386         """Get a local file time and date in UTC."""
1387 
1388         file_epoch_time = os.path.getmtime(file_name)
1389         file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1390         # year, month,   day, hour,   minute, seconds
1391         d = datetime.datetime(file_time_utc[0], file_time_utc[1],
1392                               file_time_utc[2], file_time_utc[3],
1393                               file_time_utc[4], file_time_utc[5])
1394         return d
1395 
1396     @staticmethod
1397     def get_file_size(file_name):
1398         """Get file size in bytes."""
1399         return os.path.getsize(file_name)
1400 
1401     def copy_to_text_file(self, file_name):
1402         """Make a copy of a file with a .txt extension"""
1403 
1404         # Remove the old copy with the text file extension.
1405         copy_file_name = file_name + self.user_settings.TEXT_FILE_EXT
1406         try:
1407             os.remove(copy_file_name)
1408         except OSError as detail:
1409             logging.error(
1410                 f"Cannot remove old text file copy {copy_file_name:s}: {str(detail):s}")
1411 
1412         # Create the new copy, which is an exact duplicate.
1413         self.process_lines_of_file(file_name, copy_file_name)
1414 
1415         # Make the new copy have the same modification and access time and date as the original
1416         # since it is just an exact copy.
1417         # That way we won't upload copies with newer times constantly, just because they look as
1418         # though they've been recently modified.
1419         file_stat = os.stat(file_name)
1420         os.utime(copy_file_name,
1421                  (file_stat[stat.ST_ATIME],
1422                   file_stat[stat.ST_MTIME]))
1423         logging.debug(
1424             f"Reset file time to original time for copy {copy_file_name:s}")
1425 
1426     def clean_up_temp_file(self, temp_file_name, file_name, changed):
1427         """Remove the original file, rename the temporary file name to the original name.
1428         If there are no changes, just remove the temporary file.
1429         """
1430 
1431         if changed:
1432             # Remove the old file now that we have the rewritten file.
1433             try:
1434                 os.remove(file_name)
1435                 logging.debug(
1436                     f"Changes were made.  Remove original file {file_name:s}")
1437             except OSError as detail:
1438                 logging.error(
1439                     f"Cannot remove old file {file_name:s}: {str(detail):s}.  Need to remove it manually.")
1440 
1441             # Rename the new file to the old file name.
1442             try:
1443                 os.rename(temp_file_name, file_name)
1444                 logging.debug(
1445                     f"Rename temp file {temp_file_name:s} to original file {file_name:s}")
1446             except OSError as detail:
1447                 logging.error(
1448                     f"Cannot rename temporary file {temp_file_name:s} to old file name {file_name:s}: {str(detail):s}."
1449                     f"Need to rename manually")
1450         else:
1451             # No changes?  Remove the temporary file.
1452             try:
1453                 os.remove(temp_file_name)
1454                 logging.debug(
1455                     f"No changes were made.  Remove temporary file {temp_file_name:s}")
1456             except OSError as detail:
1457                 logging.error(
1458                     f"Cannot remove temporary file {temp_file_name:s}: {str(detail):s}.  Need to remove it manually.")
1459         return
1460 
1461     def process_lines_of_file(
1462             self,
1463             in_file_name,
1464             out_file_name,
1465             process_line_function_list=None):
1466         """Process each line of a file with a list of functions.  Create a new temporary file.
1467         The default list is None which means make an exact copy.
1468         """
1469 
1470         fin = None
1471         fout = None
1472 
1473         # Assume no changes.
1474         changed = False
1475 
1476         try:
1477             fin = open(in_file_name, "r")
1478         except IOError as detail:
1479             logging.error(
1480                 f"process_lines_of_file():  \tCannot open file {in_file_name:s} for reading:  {str(detail):s}")
1481 
1482         try:
1483             fout = open(out_file_name, "w")
1484         except IOError as detail:
1485             logging.error(
1486                 f"process_lines_of_file():  \tCannot open file {out_file_name:s} for writing:  {str(detail):s}")
1487 
1488         # Read each line of the file, aborting if there is a read error.
1489         try:
1490             line = fin.readline()
1491 
1492             while line:
1493                 original_line = line
1494                 if process_line_function_list is None:
1495                     # For a simple copy, just duplicate the line unchanged.
1496                     pass
1497                 else:
1498                     # Otherwise, apply changes in succession to the line.
1499                     for processLineFunction in process_line_function_list:
1500                         line = processLineFunction(line)
1501 
1502                 if original_line != line:
1503                     logging.debug(
1504                         f"Rewrote the line >>>{original_line:s}<<< to >>>{line:s}<<<")
1505                     changed = True
1506 
1507                 fout.write(line)
1508 
1509                 line = fin.readline()
1510 
1511             fin.close()
1512             fout.close()
1513         except IOError as detail:
1514             logging.error(
1515                 f"File I/O error during reading/writing file {in_file_name:s} in process_lines_of_file: {str(detail):s}"
1516                 f"  Aborting...")
1517             sys.exit()
1518 
1519         if changed:
1520             logging.debug(
1521                 f"process_lines_of_file():  \tRewrote original file {in_file_name:s}."
1522                 f"Changes are in temporary copy {out_file_name:s}")
1523 
1524         # Return True if any lines were changed.
1525         return changed
1526 
1527 
1528 # ----------------------------------------------------------------------------
1529 #   Subclass for remote web site processing.
1530 # ----------------------------------------------------------------------------
1531 
1532 class RemoteWebSite(WebSite):
1533     """Walk the remote web directory on a web server down from the root."""
1534 
1535     def __init__(self, settings, server, user, password, ftproot):
1536         """Connect to FTP server and list all files and directories."""
1537 
1538         # Root directory of FTP server.
1539         self.root_dir = ftproot
1540         logging.debug(
1541             f"Requesting remote web site ftp root dir {self.root_dir:s}")
1542 
1543         # Connect to FTP server and log in.
1544         try:
1545             # self.ftp.set_debuglevel( 2 )
1546             self.ftp = ftplib.FTP(server)
1547             self.ftp.login(user, password)
1548         # Catch all exceptions with the parent class Exception:  all built-in,
1549         # non-system-exiting exceptions are derived from this class.
1550         except Exception as detail:
1551             # Extract the string message from the exception class with str().
1552             logging.error(
1553                 f"Remote web site cannot login to ftp server: {str(detail):s}  Aborting...")
1554             sys.exit()
1555         else:
1556             logging.debug("Remote web site ftp login succeeded.")
1557 
1558         logging.debug(
1559             f"Remote web site ftp welcome message {self.ftp.getwelcome():s}")
1560 
1561         # Initialize the superclass.
1562         WebSite.__init__(self, settings)
1563 
1564     def go_to_root_dir(self, root_dir):
1565         """Go to the root directory"""
1566 
1567         try:
1568             # Go to the root directory.
1569             self.ftp.cwd(root_dir)
1570             logging.debug(
1571                 f"ftp root directory (requested) = {self.root_dir:s}")
1572 
1573             # Read it back.
1574             self.root_dir = self.ftp.pwd()
1575             logging.debug(
1576                 f"ftp root directory (read back from server): {self.root_dir:s}")
1577 
1578         except Exception as detail:
1579             logging.error(
1580                 f"go_to_root_dir(): \tCannot ftp cwd or pwd root dir {root_dir:s} {str(detail):s} Aborting...")
1581             sys.exit()
1582 
1583     def get_root_dir(self):
1584         """Get the root directory name"""
1585 
1586         return self.root_dir
1587 
1588     def finish(self):
1589         """Quit web site walking"""
1590 
1591         logging.debug("RemoteWebSite::finish().")
1592         try:
1593             self.ftp.quit()
1594         except Exception as detail:
1595             logging.error(f"Cannot ftp quit: {str(detail):s}")
1596 
1597     def one_level_down(self, d):
1598         """List files and directories in a subdirectory using ftp"""
1599 
1600         directories = []
1601         files = []
1602 
1603         try:
1604             # ftp listing from current dir.
1605             logging.debug(f"RemoteWebSite.one_level_down():  \tftp cwd: {d:s}")
1606             self.ftp.cwd(d)
1607             dir_list = []
1608 
1609             self.ftp.retrlines('LIST', dir_list.append)
1610         except Exception as detail:
1611             logging.error(
1612                 f"one_level_down(): \tCannot ftp cwd or ftp LIST dir {d:s}:  {str(detail):s} Aborting...")
1613             sys.exit()
1614 
1615         for line in dir_list:
1616             logging.debug(
1617                 f"RemoteWebSite.one_level_down():  \tftp LIST: {line:s}")
1618 
1619             # Line should at least have the minimum FTP information.
1620             if len(line) >= self.user_settings.MIN_FTP_LINE_LENGTH:
1621                 file_info = self.get_ftp_file_info(line)
1622 
1623                 if file_info[self.user_settings.FILE_NAME] == "":
1624                     logging.error(
1625                         "RemoteWebSite.one_level_down():  \tFTP LIST file name is NULL:")
1626 
1627                 logging.debug(
1628                     f"RemoteWebSite.one_level_down():  \tftp parsed file info:\
1629                     {file_info[self.user_settings.FILE_NAME]:s}")
1630 
1631                 # Prefix the full path prefix from the root to the directory
1632                 # name and add to the directory list.
1633                 if file_info[self.user_settings.FILE_TYPE] == FileType.DIRECTORY:
1634                     dirname = self.append_root_dir(
1635                         d, file_info[self.user_settings.FILE_NAME])
1636                     logging.debug(
1637                         f"RemoteWebSite.one_level_down():  \tftp dir (full path): {dirname:s}")
1638                     directories.append(dirname)
1639                 # Add file information to the list of files.
1640                 else:
1641                     # Update the file name only:  add the full path prefix from
1642                     # the root.
1643                     file_info[self.user_settings.FILE_NAME] = self.append_root_dir(
1644                         d, file_info[self.user_settings.FILE_NAME])
1645                     logging.debug(
1646                         f"RemoteWebSite.one_level_down():  \tftp file (full path):\
1647                         {file_info[self.user_settings.FILE_NAME]:s}")
1648                     files.append(file_info)
1649             else:
1650                 logging.error(
1651                     f"RemoteWebSite.one_level_down():  \tFTP LIST line is too short:  {line:s}")
1652 
1653         directories.sort()
1654         files.sort()
1655 
1656         return directories, files
1657 
1658     def modtime(self, f):
1659         """Get the modification time of a file via ftp.  Return 0 if ftp cannot get it."""
1660         modtime = 0
1661 
1662         try:
1663             response = self.ftp.sendcmd('MDTM ' + f)
1664             # MDTM returns the last modified time of the file in the format
1665             # "213 YYYYMMDDhhmmss \r\n <error-response>
1666             # MM is 01 to 12, DD is 01 to 31, hh is 00 to 23, mm is 00 to 59, ss is 0 to 59.
1667             # error-response is 550 for info not available, and 500 or 501 if command cannot
1668             # be parsed.
1669             if response[:3] == '213':
1670                 modtime = response[4:]
1671         except ftplib.error_perm:
1672             modtime = 0
1673 
1674         return modtime
1675 
1676     def get_ftp_file_info(self, line):
1677         """Parse the ftp file listing and return file name, datetime and file size.
1678 
1679           FTP uses UTC for its listings; the conversion to local time is done by the OS.
1680 
1681           We can have problems on New Year's Eve.  For example, the master file date/time is
1682               Mon Jan  1 06:23:12 2018
1683           But the remote file date/time from FTP listing doesn't show a year even though we know it was written to the server in 2017.
1684               Mon Dec 31 03:02:00
1685           So we default the remote file year to current year 2018 and get
1686               Mon Dec 31 03:02:00 2018
1687           Now we think that the remote file is newer by 363.860278 days.
1688         """
1689 
1690         # Find out if we've a directory or a file.
1691         if line[0] == 'd':
1692             dir_or_file = FileType.DIRECTORY
1693         else:
1694             dir_or_file = FileType.FILE
1695 
1696         pattern = re.compile(self.user_settings.FTP_LISTING, re.VERBOSE)
1697 
1698         # Sensible defaults.
1699         filesize = 0
1700         filename = ""
1701         # Default the time to hour 0, minute 0, second 0 (i.e. midnight).
1702         hour = 0
1703         minute = 0
1704         seconds = 0
1705         # Default the date to Jan 1
1706         month = 1
1707         day = 1
1708 
1709         # Extract time and date from the ftp listing.
1710         match = pattern.search(line)
1711 
1712         logging.debug(f"ftp file listing {line}")
1713 
1714         if match:
1715             filesize = int(match.group('bytes'))
1716             month = self.user_settings.monthToNumber[match.group('mon')]
1717             day = int(match.group('day'))
1718 
1719             # Remote file listing contains the year.  The FTP listing will omit the hour and minute.
1720             if match.group('year'):
1721                 year = int(match.group('year'))
1722                 logging.debug(f"ftp has year = {year} but is probably missing hour and minute")
1723             else:
1724                 # Remote file listing omits the year.  Default the year to the current UTC time year.
1725                 # That may be incorrect (see comments above).
1726                 year = WebSite.get_current_year()
1727                 logging.debug(f"ftp is missing the year;  use the current year = {year}")
1728 
1729             # If the FTP listing has the hour and minute, it will omit the year.
1730             if match.group('hour') and match.group('min'):
1731                 hour = int(match.group('hour'))
1732                 minute = int(match.group('min'))
1733                 logging.debug(f"ftp has hour = {hour} and minute = {minute} so is probably missing the year")
1734 
1735             filename = match.group('filename')
1736 
1737         # Package up the time and date nicely.
1738         # Note if we didn't get any matches, we'll default the remote date and
1739         # time to Jan 1 midnight of the current year.
1740         d = datetime.datetime(year, month, day, hour, minute, seconds)
1741 
1742         return [filename, dir_or_file, d, filesize]
1743 
1744 
1745 class UpdateWeb(object):
1746     """Given previously scanned master and remote directories, update the remote website."""
1747 
1748     def __init__(
1749             self,
1750             settings,
1751             server,
1752             user,
1753             password,
1754             ftproot,
1755             file_size_limit,
1756             master_directory_list,
1757             master_file_info,
1758             remote_directory_list,
1759             remote_file_info):
1760         """Connect to remote site.  Accept previously scanned master and remote files and directories."""
1761 
1762         self.user_settings = settings
1763 
1764         self.master_files_list = []
1765         self.remote_files_list = []
1766         self.master_file_to_size = {}
1767         self.master_file_to_date_time = {}
1768         self.remote_file_to_date_time = {}
1769         self.master_only_dirs = []
1770         self.master_only_files = []
1771         self.remote_only_dirs = []
1772         self.remote_only_files = []
1773         self.common_files = []
1774 
1775         # Connect to FTP server and log in.
1776         try:
1777             self.ftp = ftplib.FTP(server)
1778             self.ftp.login(user, password)
1779         except Exception as detail:
1780             logging.error(
1781                 f"Cannot login to ftp server: {str(detail):s} Aborting...")
1782             sys.exit()
1783         else:
1784             logging.debug("ftp login succeeded.")
1785 
1786         logging.debug(
1787             f"ftp server welcome message:  {self.ftp.getwelcome():s}")
1788 
1789         # Master root directory.
1790         self.master_root_dir = self.user_settings.master_root_dir
1791         logging.debug(
1792             f"Master (local to disk) root directory: {self.master_root_dir:s}")
1793 
1794         # Root directory of FTP server.
1795         self.ftp_root_dir = ftproot
1796         logging.debug(
1797             f"ftp root directory (requested) = {self.ftp_root_dir:s}")
1798 
1799         # Transform KB string to integer bytes.  e.g. "200" => 2048000
1800         self.file_size_limit = int(file_size_limit) * 1024
1801 
1802         try:
1803             # Go to the root directory.
1804             self.ftp.cwd(self.ftp_root_dir)
1805 
1806             # Read it back.
1807             self.ftp_root_dir = self.ftp.pwd()
1808             logging.debug(
1809                 f"ftp root directory (read back from server): {self.ftp_root_dir:s}")
1810         except Exception as detail:
1811             logging.error(
1812                 f"UpdateWeb(): \tCannot ftp cwd or ftp LIST dir {self.ftp_root_dir:s} {str(detail):s} Aborting...")
1813 
1814         self.master_directory_list = master_directory_list
1815         self.remote_directory_list = remote_directory_list
1816         self.master_file_info = master_file_info
1817         self.remote_file_info = remote_file_info
1818 
1819     def append_root_dir(self, root_dir, name):
1820         """Append the root directory to a path"""
1821 
1822         # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1823         # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1824         if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1825             return root_dir + name
1826         else:
1827             return root_dir + "/" + name
1828 
1829     def file_info(self):
1830         """Create lists of file names from the file information.  Also create dictionaries which map file names onto
1831         dates, times, and sizes."""
1832 
1833         # Extract file names.
1834         self.master_files_list = [
1835             file_info[self.user_settings.FILE_NAME] for file_info in self.master_file_info]
1836         self.remote_files_list = [
1837             file_info[self.user_settings.FILE_NAME] for file_info in self.remote_file_info]
1838 
1839         # Use a dictionary comprehension to create key/value pairs, (file name,
1840         # file date/time), which map file names onto date/time.
1841         self.master_file_to_date_time = {
1842             file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME]
1843             for file_info in self.master_file_info}
1844         self.remote_file_to_date_time = {
1845             file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME]
1846             for file_info in self.remote_file_info}
1847 
1848         # Dictionary comprehension creates a mapping of master file names onto
1849         # file sizes.
1850         self.master_file_to_size = {file_info[self.user_settings.FILE_NAME]
1851                                     : file_info[self.user_settings.FILE_SIZE] for file_info in self.master_file_info}
1852 
1853     def update(self):
1854         """Scan through the master website, cleaning it up.
1855         Go to remote website on my servers and synchronize all files."""
1856 
1857         self.file_info()
1858 
1859         # Which files and directories are different.
1860         self.changes()
1861 
1862         # Synchronize with the master.
1863         self.synchronize()
1864 
1865     def changes(self):
1866         """Find the set of different directories and files on master and remote."""
1867 
1868         # Add all directories which are only on master to the dictionary.
1869         dir_to_type = {
1870             d: FileType.ON_MASTER_ONLY for d in self.master_directory_list}
1871 
1872         # Scan through all remote directories, adding those only on remote or
1873         # on both.
1874         for d in self.remote_directory_list:
1875             if d in dir_to_type:
1876                 dir_to_type[d] = FileType.ON_BOTH_MASTER_AND_REMOTE
1877             else:
1878                 dir_to_type[d] = FileType.ON_REMOTE_ONLY
1879 
1880         # Add all files which are only on master to the dictionary.
1881         file_to_type = {
1882             f: FileType.ON_MASTER_ONLY for f in self.master_files_list}
1883 
1884         # Scan through all remote files, adding those only on remote or on
1885         # both.
1886         for f in self.remote_files_list:
1887             if f in file_to_type:
1888                 file_to_type[f] = FileType.ON_BOTH_MASTER_AND_REMOTE
1889             else:
1890                 file_to_type[f] = FileType.ON_REMOTE_ONLY
1891 
1892         logging.debug("Raw dictionary dump of directories")
1893         for k, v in dir_to_type.items():
1894             logging.debug(f"\t dir:  {str(k):s}  type: {str(v):s}")
1895 
1896         logging.debug("Raw dictionary dump of files")
1897         for k, v in file_to_type.items():
1898             logging.debug(f"\t file: {str(k):s}  type: {str(v):s}")
1899 
1900         # List of directories only on master.  Keep the ordering.
1901         self.master_only_dirs = [
1902             d for d in self.master_directory_list if dir_to_type[d] == FileType.ON_MASTER_ONLY]
1903 
1904         # List of directories only on remote.  Keep the ordering.
1905         self.remote_only_dirs = [
1906             d for d in self.remote_directory_list if dir_to_type[d] == FileType.ON_REMOTE_ONLY]
1907 
1908         # We don't care about common directories, only their changed files, if
1909         # any.
1910 
1911         # List of files only on master.  Keep the ordering.
1912         self.master_only_files = [
1913             f for f in self.master_files_list if file_to_type[f] == FileType.ON_MASTER_ONLY]
1914 
1915         # List of files only on remote.  Keep the ordering.
1916         self.remote_only_files = [
1917             f for f in self.remote_files_list if file_to_type[f] == FileType.ON_REMOTE_ONLY]
1918 
1919         # List of common files on both master and remote.  Keep the ordering.
1920         self.common_files = [
1921             f for f in self.master_files_list if file_to_type[f] == FileType.ON_BOTH_MASTER_AND_REMOTE]
1922 
1923         logging.debug(
1924             "*** Directories only on master ******************************")
1925         for d in self.master_only_dirs:
1926             logging.debug(f"\t {d:s}")
1927 
1928         logging.debug(
1929             "*** Directories only on remote ******************************")
1930         for d in self.remote_only_dirs:
1931             logging.debug(f"\t {d:s}")
1932 
1933         logging.debug(
1934             "*** Files only on master ******************************")
1935         for f in self.master_only_files:
1936             logging.debug(f"\t {f:s}")
1937 
1938         logging.debug(
1939             "*** Files only on remote ******************************")
1940         for f in self.remote_only_files:
1941             logging.debug(f"\t {f:s}")
1942 
1943         logging.debug("*** Common files ******************************")
1944         for f in self.common_files:
1945             logging.debug(
1946                 f"\tname {f:s} master time {self.master_file_to_date_time[f].ctime():s} remote time {self.remote_file_to_date_time[f].ctime():s}")
1947 
1948     def synchronize(self):
1949         """Synchronize files and subdirectories in the remote directory with the master directory."""
1950 
1951         # If we have the same files in master and remote, compare their times
1952         # and dates.
1953         for f in self.common_files:
1954             master_file_time = self.master_file_to_date_time[f]
1955             remote_file_time = self.remote_file_to_date_time[f]
1956 
1957             # How many fractional days different are we?
1958             days_different = abs((remote_file_time -
1959                                   master_file_time).days +
1960                                  (remote_file_time -
1961                                   master_file_time).seconds /
1962                                  (60.0 *
1963                                   60.0 *
1964                                   24.0))
1965 
1966             # Assume no upload initially.
1967             upload_to_host = False
1968 
1969             logging.debug(f"Common file:  {f:s}.")
1970 
1971             # Remote file time is newer.
1972             if remote_file_time > master_file_time:
1973                 # Remote file time is MUCH newer:  suspect time is out of joint
1974                 # on the server, so upload local master file to be safe.
1975                 if days_different >= self.user_settings.DAYS_NEWER_FOR_REMOTE_NEW_YEARS_GLITCH:
1976                     logging.error(
1977                         f"Remote file {f:s} is newer by {days_different:f}\
1978                         days.  Probably New Year's glitch.  Upload file to be safe.")
1979                     logging.error(
1980                         f"\tmaster time {master_file_time.ctime():s} remote time\
1981                         {remote_file_time.ctime():s}")
1982 
1983                     # Set the master file to the current time.
1984                     full_file_name = self.append_root_dir(
1985                         self.master_root_dir, f)
1986                     if os.path.exists(full_file_name):
1987                         os.utime(full_file_name, None)
1988                         logging.error(
1989                             f"Touching master file {full_file_name:s} to make it the current time")
1990 
1991                     upload_to_host = True
1992                 # Remote file time is newer;  probably OK, just a little time
1993                 # inaccuracy on the server.
1994                 else:
1995                     logging.debug(
1996                         f"Remote file {f:s} is newer by {days_different:f} days."
1997                         f"Probably time inaccuracy on the server.  Wait -- don't upload yet.")
1998                     logging.debug(
1999                         f"\tmaster time {master_file_time.ctime():s} remote time {remote_file_time.ctime():s}")
2000                     upload_to_host = False
2001 
2002             # Master file time is newer.
2003             elif master_file_time > remote_file_time:
2004                 # Master file time is newer (by several minutes), that it's
2005                 # likely to be changed;  upload.
2006                 if days_different >= self.user_settings.DAYS_NEWER_FOR_MASTER_BEFORE_UPLOAD:
2007                     logging.warning(
2008                         f"Master file {f:s} is newer by {days_different:f} days.  Preparing for upload.")
2009                     logging.warning(
2010                         f"\tmaster time {master_file_time.ctime():s} remote time {remote_file_time.ctime():s}")
2011                     upload_to_host = True
2012                 else:
2013                     logging.debug(
2014                         f"Master file {f:s} is slightly newer by {days_different:f} days.  Wait -- don't upload yet.")
2015                     logging.debug(
2016                         f"\tmaster time {master_file_time.ctime():s} remote time {remote_file_time.ctime():s}")
2017                     upload_to_host = False
2018 
2019             # Cancel the upload if the file is too big for the server.
2020             size = self.master_file_to_size[f]
2021             if size >= self.file_size_limit:
2022                 logging.error(
2023                     f"upload():  Skipping upload of file {f:s} of size {size:d};\
2024                       too large for server, limit is {self.file_size_limit:d} bytes")
2025                 upload_to_host = False
2026 
2027             # Finally do the file upload.
2028             if upload_to_host:
2029                 print(f"Uploading changed file {f:s}...", end='', flush=True)
2030                 self.upload(f)
2031 
2032         # Remote directory is not in master.  Delete it.
2033         for d in self.remote_only_dirs:
2034             logging.debug(f"Remote only dir.  Attempting to delete it:  {d:s}")
2035             print(f"Deleting remote directory {d:s}...", end='', flush=True)
2036             self.rmdir(d)
2037 
2038         # Master directory missing on remote.  Create it.
2039         # Due to breadth first order scan, we'll create parent directories
2040         # before child directories.
2041         for d in self.master_only_dirs:
2042             logging.debug(f"Master only dir.  Creating dir {d:s} on remote.")
2043             print(
2044                 f"Creating new remote directory {d:s}...",
2045                 end='',
2046                 flush=True)
2047             self.mkdir(d)
2048 
2049         # Master file missing on remote.  Upload it.
2050         for f in self.master_only_files:
2051             logging.debug(f"Master only file.  Uploading {f:s} to remote.")
2052 
2053             #  But cancel the upload if the file is too big for the server.
2054             size = self.master_file_to_size[f]
2055             if size >= self.file_size_limit:
2056                 logging.error(
2057                     f"upload():  Skipping upload of file {f:s} of size {size:d};"
2058                     f" too large for server, limit is {self.file_size_limit:d} bytes")
2059             else:
2060                 print(f"Uploading new file {f:s}...", end='', flush=True)
2061                 self.upload(f)
2062 
2063         # Remote contains a file not present on the master.  Delete the file.
2064         for f in self.remote_only_files:
2065             logging.debug(f"Remote only file.  Deleting remote file {f:s}.")
2066             print(f"Deleting remote file {f:s}...", end='', flush=True)
2067             self.del_remote(f)
2068 
2069     def del_remote(self, relative_file_path):
2070         """Delete a file using ftp."""
2071 
2072         logging.debug(
2073             f"del_remote():  \trelative file path name: {relative_file_path:s}")
2074 
2075         # Parse the relative file path into file name and relative directory.
2076         relative_dir, file_name = os.path.split(relative_file_path)
2077         logging.debug(f"del_remote():  \tfile name: {file_name:s}")
2078         logging.debug(f"del_remote():  \trelative dir: {relative_dir:s}")
2079         logging.debug(
2080             f"del_remote():  \tremote root dir: {self.ftp_root_dir:s}")
2081 
2082         try:
2083             # Add the remote root path and go to the remote directory.
2084             remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2085             logging.debug(
2086                 f"del_remote():  \tftp cd remote dir: {remote_dir:s}")
2087             self.ftp.cwd(remote_dir)
2088         except Exception as detail:
2089             logging.error(
2090                 f"del_remote():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2091         else:
2092             try:
2093                 logging.debug(f"del_remote():  \tftp rm: {file_name:s}")
2094 
2095                 # Don't remove zero length file names.
2096                 if len(file_name) > 0:
2097                     self.ftp.delete(file_name)
2098                 else:
2099                     logging.warning(
2100                         "fdel_remote():  skipping ftp delete;  file NAME {file_name:s} had zero length")
2101             except Exception as detail:
2102                 logging.error(
2103                     f"del_remote():  \tCannot ftp rm: {str(detail):s}")
2104 
2105     def mkdir(self, relative_dir):
2106         """Create new remote directory using ftp."""
2107 
2108         logging.debug(f"mkdir():  \trelative dir path name: {relative_dir:s}")
2109         logging.debug(f"mkdir():  \tremote root dir: {self.ftp_root_dir:s}")
2110 
2111         # Parse the relative dir path into prefix dir and suffix dir.
2112         path, d = os.path.split(relative_dir)
2113         logging.debug(f"mkdir():  \tremote prefix dir: {path:s}")
2114         logging.debug(f"mkdir():  \tremote dir:  {d:s}")
2115 
2116         try:
2117             # Add the remote root path and go to the remote directory.
2118             remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2119             logging.debug(f"mkdir():  \tftp cd remote dir: {remote_dir:s}")
2120             self.ftp.cwd(remote_dir)
2121         except Exception as detail:
2122             logging.error(
2123                 f"mkdir():  \tCannot ftp chrdir: {str(detail):s}  Skipping...")
2124         else:
2125             try:
2126                 logging.debug(f"mkdir():  \tftp mkd: {d:s}")
2127                 self.ftp.mkd(d)
2128             except Exception as detail:
2129                 logging.error(f"mkdir():  \tCannot ftp mkdir: {str(detail):s}")
2130 
2131     def rmdir(self, relative_dir):
2132         """Delete an empty directory using ftp."""
2133 
2134         logging.debug(
2135             f"rmdir():  \tintermediate dir path name: {relative_dir:s}")
2136         logging.debug(f"rmdir():  \tremote root dir: {self.ftp_root_dir:s}")
2137 
2138         # Parse the relative dir path into prefix dir and suffix dir.
2139         path, d = os.path.split(relative_dir)
2140         logging.debug(f"rmdir():  \tremote prefix dir: {path:s}")
2141         logging.debug(f"rmdir():  \tremote dir:  {d:s}")
2142 
2143         try:
2144             # Add the remote root path and go to the remote directory.
2145             remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2146             logging.debug(f"rmdir():  \tftp cd remote dir: {remote_dir:s}")
2147             self.ftp.cwd(remote_dir)
2148         except Exception as detail:
2149             logging.error(
2150                 f"rmdir():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2151         else:
2152             try:
2153                 logging.debug(f"rmdir():  \tftp rmd: {d:s}")
2154                 self.ftp.rmd(d)
2155             except Exception as detail:
2156                 logging.error(
2157                     f"rmdir():  \tCannot ftp rmdir dir {d:s}: {str(detail):s}"
2158                     f"  Directory is probably not empty.  Do a manual delete.")
2159 
2160     def download(self, relative_file_path):
2161         """Download a binary file using ftp."""
2162 
2163         logging.debug(f"download():  \tfile name: {relative_file_path:s}")
2164 
2165         # Parse the relative file path into file name and relative directory.
2166         relative_dir, file_name = os.path.split(relative_file_path)
2167         logging.debug(f"download():  \tfile name: {file_name:s}")
2168         logging.debug(f"download():  \trelative dir: {relative_dir:s}")
2169         logging.debug(f"download():  \troot dir: {self.ftp_root_dir:s}")
2170 
2171         # Add the remote root path and go to the remote directory.
2172         remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2173         logging.debug(f"download():  \tftp cd remote dir: {remote_dir:s}")
2174 
2175         try:
2176             self.ftp.cwd(remote_dir)
2177         except Exception as detail:
2178             logging.error(
2179                 f"download():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2180         else:
2181             # Add the master root path to get the local file name.
2182             # Open local binary file to write into.
2183             local_file_name = self.append_root_dir(
2184                 self.master_root_dir, relative_file_path)
2185             logging.debug(
2186                 f"download():  \topen local file name: {local_file_name:s}")
2187             try:
2188                 f = open(local_file_name, "wb")
2189                 try:
2190                     # Calls f.write() on each block of the binary file.
2191                     # ftp.retrbinary( "RETR " + file_name, f.write )
2192                     pass
2193                 except Exception as detail:
2194                     logging.error(
2195                         f"download():  \tCannot cannot ftp retrbinary: {str(detail):s}")
2196                 f.close()
2197             except IOError as detail:
2198                 logging.error(
2199                     f"download():  \tCannot open local file {local_file_name:s} for reading:  {str(detail):s}")
2200 
2201     def upload(self, relative_file_path):
2202         """Upload  a binary file using ftp."""
2203 
2204         logging.debug(
2205             f"upload():  \trelative file path name: {relative_file_path:s}")
2206 
2207         # Parse the relative file path into file name and relative directory.
2208         relative_dir, file_name = os.path.split(relative_file_path)
2209         logging.debug(f"upload():  \tfile name: {file_name:s}")
2210         logging.debug(f"upload():  \trelative dir: {relative_dir:s}")
2211         logging.debug(f"upload():  \tremote root dir: {self.ftp_root_dir:s}")
2212 
2213         # Add the remote root path and go to the remote directory.
2214         remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2215         logging.debug(f"upload():  \tftp cd remote dir: {remote_dir:s}")
2216 
2217         try:
2218             self.ftp.cwd(remote_dir)
2219         except Exception as detail:
2220             logging.error(
2221                 f"upload():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2222         else:
2223             # Add the master root path to get the local file name.
2224             # Open local binary file to read from.
2225             local_file_name = self.append_root_dir(
2226                 self.master_root_dir, relative_file_path)
2227             logging.debug(
2228                 f"upload():  \topen local file name: {local_file_name:s}")
2229 
2230             try:
2231                 f = open(local_file_name, "rb")
2232                 try:
2233                     # f.read() is called on each block of the binary file until
2234                     # EOF.
2235                     logging.debug(f"upload():  \tftp STOR file {file_name:s}")
2236                     self.ftp.storbinary("STOR " + file_name, f)
2237                 except Exception as detail:
2238                     logging.error(
2239                         f"upload():  \tCannot ftp storbinary: {str(detail):s}")
2240                 f.close()
2241             except IOError as detail:
2242                 logging.error(
2243                     f"upload():  \tCannot open local file {local_file_name:s} for reading:  {str(detail):s}")
2244 
2245     def finish(self):
2246         """Log out of an ftp session"""
2247 
2248         logging.debug("UpdateWeb::finish()")
2249         try:
2250             self.ftp.quit()
2251         except Exception as detail:
2252             logging.error(f"Cannot ftp quit because {str(detail):s}")
2253 
2254 
2255 if __name__ == '__main__':
2256     """Python executes all code in the file, so all classes and functions get defined first.  Finally we come here.
2257     If we are executing this file as a Python script, the name of the current module is set to main,
2258     thus we'll call the main() function."""
2259 
2260     main()
2261 
2262 else:
2263     """When using as a module, start python, then import the module and call it:
2264     python
2265         import updateweb
2266         updateweb.main(["--test"])
2267 
2268     Or if you want to debug, do this:
2269 
2270     python
2271         import pdb
2272         import updateweb
2273         pdb.run('updateweb.main(["--test"])')
2274         b updateweb.main
2275         c
2276         <Now use n to step, l to list, etc>
2277     """
2278 
2279     pass