1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81 NOTES
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97 import sys
98 import os
99 import argparse
100 import shutil
101 from pathlib import Path
102
103
104 import re
105
106
107 import ftplib
108
109
110 import time
111 import stat
112 import datetime
113
114
115 import logging
116
117
118 import unittest
119
120
121 from enum import Enum
122
123
124
125
126
127
128
129 class TreeWalk(Enum):
130 BREADTH_FIRST_SEARCH = 1
131 DEPTH_FIRST_SEARCH = 2
132
133
134
135 class FileType(Enum):
136 DIRECTORY = 0
137 FILE = 1
138 ON_MASTER_ONLY = 2
139 ON_REMOTE_ONLY = 3
140 ON_BOTH_MASTER_AND_REMOTE = 4
141
142
143
144 class UserSettings:
145
146 LOGFILENAME = ""
147 VERBOSE = False
148 CLEANONLY = False
149 UNITTEST = False
150
151
152
153 RECURSION_DEPTH = 5000
154 sys.setrecursionlimit(RECURSION_DEPTH)
155
156
157 FILE_NAME = 0
158 FILE_TYPE = 1
159 FILE_DATE_TIME = 2
160 FILE_SIZE = 3
161
162
163
164 PARAMETERS_FILE = "/private/param.txt"
165
166
167
168 SERVER = 19
169 USER = 20
170 PASSWORD = 21
171 FTP_ROOT = 22
172 FILE_SIZE_LIMIT = 23
173
174
175 monthToNumber = {
176 'Jan': 1,
177 'Feb': 2,
178 'Mar': 3,
179 'Apr': 4,
180 'May': 5,
181 'Jun': 6,
182 'Jul': 7,
183 'Aug': 8,
184 'Sep': 9,
185 'Oct': 10,
186 'Nov': 11,
187 'Dec': 12}
188
189
190
191
192
193
194
195
196
197
198
199
200 DIR_TO_SKIP = "private|mathjax|.git|.github|.svn|build|XCodeOutput\
201 |Debug|Release|PyCharm|.idea|.ipynb_checkpoints|ModuleCache.noindex|SymbolCache.noindex|Primpoly-[a-z]"
202
203
204
205
206
207
208
209 FILE_TO_SKIP = ".gitignore|.travis.yml|.svnignore|.htaccess"
210
211
212 TEXT_FILE_EXT = ".txt"
213
214
215
216 TEMP_FILE_SUFFIXES = r""" # Use Python raw strings.
217 \. # Match the dot in the file name.
218 # Now begin matching the file name suffix.
219 # (?: non-capturing match for the regex inside the parentheses,
220 # i.e. matching string cannot be retrieved later.
221 # Now match any of the following file extensions:
222 (?: o | obj | lib | exe | # Object files generated by C, C++, etc compilers
223 pyc | # Object file generated by the Python compiler
224 ilk | pdb | sup | # Temp files from VC++ compiler
225 idb | ncb | opt | plg | # Temp files from VC++ compiler
226 sbr | bsc | map | bce | # Temp files from VC++ compiler
227 res | aps | dep | db | # Temp files from VC++ compiler
228 jbf | # Paintshop Pro
229 class | jar | # Java compiler
230 log | # WS_FTP
231 fas | # CLISP compiler
232 swp | swo | # Vim editor
233 aux | # TeX auxilliary files.
234 DS_Store | _\.DS_Store | # macOS finder folder settings.
235 _\.Trashes | # macOS recycle bin
236 gdb_history) # GDB history
237 $ # Now we should see only the end of line.
238 """
239
240
241
242 VIM_TEMP_FILE_EXT = "~"
243
244
245
246 TEMP_DIR_SUFFIX = r""" # Use Python raw strings.
247 (?: Debug | Release | # C++ compiler
248 ipch | \.vs | # Temp directories from VC++ compiler
249 \.Trashes | \.Trash) # macOS recycle bin
250 $
251 """
252
253
254 TEMP_FILE_EXT = ".new"
255
256
257
258 SOURCE_FILE_SUFFIX = r""" # Use Python raw strings.
259 (?: makefile$ # Any file called makefile is a source file.
260 |
261 (\. # Match the filename suffix after the .
262 # Now match any of these suffixes:
263 (?: html | htm | # HTML hypertext
264 css | # CSS style sheet
265 c | cpp | h | hpp | # C++ and C
266 js | # Javascript
267 py | # Python
268 lsp | # LISP
269 m | # MATLAB
270 FOR | for | f | # FORTRAN
271 txt | dat | # Data files
272 sh | bashrc | # Bash
273 bash_profile |
274 bash_logout)
275 $)
276 )
277 """
278
279
280
281
282 OLD_EMAIL_ADDRESS = r"""
283 artificer\!AT\!sean[e]rikoconnor\!DOT\!freeservers\!DOT\!com
284 """
285 NEW_EMAIL_ADDRESS = "seanerikoconnor!AT!gmail!DOT!com"
286
287
288
289 SUBSTRING_REPLACEMENT_LIST = \
290 [
291 [
292
293
294
295 r"""
296 <div\s+class="titlePageWrapper">
297 """,
298
299
300 r"""
301 <div class="wrapper titlePage">
302 """
303 ],
304 [
305 r"""
306 <div\s+class="scrollBox"\s+style="height:\s+50em\s+;">
307 """,
308 r"""
309 <div class="scrollBoxHuge">
310 """
311 ]
312 ]
313
314
315
316
317
318
319 CURRENT_SOFTWARE_VERSION = r"""
320 Primpoly
321 \s+
322 Version
323 \s+
324 ([0-9]+) # The two part version number NNN.nnn
325 \.
326 ([0-9]+)
327 """
328 NEW_SOFTWARE_VERSION = r"""
329 Primpoly Version 16.2
330 """
331
332
333
334 TWO_DIGIT_YEAR_FORMAT = "%02d"
335 COPYRIGHT_LINE = r"""
336 Copyright # Copyright.
337 \D+ # Any non-digits.
338 (?P<symbol> \(C\) | ©) # Match and extract the copyright symbol.
339 \D+ # Any non-digits.
340 (?P<old_year>[0-9]+) # Match and extract the old copyright year,
341 # then place it into variable 'old_year'
342 - # to
343 ([0-9]+) # New copyright year.
344 """
345
346
347
348
349 LAST_UPDATED_LINE = r"""
350 last\s+ # Match the words "last updated"
351 updated\s+
352 \d+ # Day number
353 \s+ # One or more blanks or tabs
354 [A-Za-z]+ # Month
355 \s+ # One or more blanks or tabs
356 (?P<year>\d+) # Two digit year. Place it into the variable 'year'
357 """
358
359
360 DEFAULT_ROOT_DIR = "/"
361
362
363
364 DAYS_NEWER_FOR_REMOTE_NEW_YEARS_GLITCH = 40
365
366
367
368 MINUTES_NEWER_FOR_MASTER_BEFORE_UPLOAD = 5.0
369 DAYS_NEWER_FOR_MASTER_BEFORE_UPLOAD = (
370 1.0 / 24.0) * (1.0 / 60.0) * MINUTES_NEWER_FOR_MASTER_BEFORE_UPLOAD
371
372
373
374 MIN_FTP_LINE_LENGTH = 7
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393 FTP_LISTING = r"""
394 [drwx-]+ # Unix type file mode.
395 \s+ # One or more blanks or tabs.
396 \d+ # Number of links.
397 \s+
398 \w+ # Owner.
399 \s+
400 \w+ # Group.
401 \s+
402 (?P<bytes> \d+) # File size in bytes, placed into the variable 'bytes'.
403 \s+
404 (?P<mon> \w+) # Month modified, placed into the variable 'mon'.
405 \s+
406 (?P<day> \d+) # Day modified, placed into the variable 'day'.
407 \s+
408 (
409 (?P<hour> \d+) # Hour modified, placed into the variable 'hour'.
410 :
411 (?P<min> \d+) # Minute modified, placed into the variable 'min'.
412 |
413 (?P<year> \d+) # If hours and minutes are absent (happens when year is not the current year),
414 # extract the year instead.
415 )
416 \s+
417 (?P<filename> [A-Za-z0-9"'.\-_,~()=+#]+) # Path and file name containing letters, numbers,
418 # and funny characters. We must escape some of
419 # these characters with a backslash, \.
420 """
421
422 def __init__(self):
423 """Set up the user settings."""
424
425 self.private_settings = []
426 self.master_root_dir = ""
427
428
429 self.get_master_root_dir()
430 self.get_private_settings()
431
432 def get_private_settings(self):
433 """
434 Read web account private settings from a secret offline parameter file. Return an array of strings.
435 e.g. self.private_settings[ 19 ] = "seanerikoconnor.freeservers.com", where the index 19 = UserSettings.SERVER
436 """
437
438
439 in_file_name = self.master_root_dir + self.PARAMETERS_FILE
440
441 try:
442 fin = open(in_file_name, "r")
443 except IOError as detail:
444 logging.error(
445 f"Cannot open the private settings file {in_file_name:s}: {str(detail):s}. Aborting...")
446 sys.exit()
447
448
449 try:
450 line = fin.readline()
451 while line:
452
453 self.private_settings.append(line.strip())
454 line = fin.readline()
455 fin.close()
456 except Exception as detail:
457 logging.error(
458 f"File I/O error reading private settings file {in_file_name:s}: {str(detail):s}. Aborting...")
459 sys.exit()
460
461 return
462
463 def get_master_root_dir(self):
464 """Get the master website root directory on this platform."""
465
466
467 local_web_dir_path = "/Desktop/Sean/WebSite"
468
469 if sys.platform.startswith('darwin'):
470 self.master_root_dir = str(Path.home()) + local_web_dir_path
471
472 elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
473 self.master_root_dir = str(Path.home()) + local_web_dir_path
474 return
475
476
477
478
479
480
481
482 def pattern_match(regular_expression, search_string):
483 pat = re.compile(regular_expression, re.VERBOSE | re.IGNORECASE)
484 match = pat.search(search_string)
485 return [pat, match]
486
487
488
489
490
491
492 class UnitTest(unittest.TestCase):
493
494 def setUp(self):
495 self.user_settings = UserSettings()
496 self.user_settings.get_master_root_dir()
497 self.private_settings = self.user_settings.private_settings
498
499 def tearDown(self):
500 self.user_settings = None
501 self.private_settings = None
502
503
504 def test_user_settings(self):
505 computed = f"File size limit = {int(self.private_settings[self.user_settings.FILE_SIZE_LIMIT]):d} K"
506 actual = "File size limit = 50000 K"
507 self.assertEqual(
508 computed,
509 actual,
510 "File size limit settings are incorrect.")
511
512
513 def test_copyright_updating(self):
514 old_line = "Copyright (C) 1999-2024 by Sean Erik O'Connor. All Rights Reserved.\
515 Copyright © 1999-2024 by Sean Erik O'Connor"
516 new_line = "Copyright (C) 1999-2024 by Sean Erik O'Connor. All Rights Reserved.\
517 Copyright © 1999-2024 by Sean Erik O'Connor"
518 [pat, match] = pattern_match(
519 self.user_settings.COPYRIGHT_LINE, old_line)
520 if match:
521 old_year = int(match.group('old_year'))
522
523 current_year = int(time.gmtime()[0])
524 if old_year < current_year:
525
526
527
528
529 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
530 str(current_year)
531 updated_line = pat.sub(new_copyright, old_line)
532 self.assertEqual(
533 new_line,
534 updated_line,
535 f"newline = |{new_line:s}| updated_line = |{updated_line:s}|")
536 else:
537 self.fail()
538 else:
539 self.fail()
540
541
542 def test_update_software_version(self):
543 old_version_line = "| Primpoly Version 16.2 - A Program for Computing Primitive Polynomials.|"
544 new_version_line = "| Primpoly Version 16.2 - A Program for Computing Primitive Polynomials.|"
545 [pat, match] = pattern_match(
546 self.user_settings.CURRENT_SOFTWARE_VERSION, old_version_line)
547 if match:
548
549
550 new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
551 updated_version_line = pat.sub(new_version, old_version_line)
552 self.assertEqual(
553 updated_version_line,
554 new_version_line,
555 f"updated version line = {updated_version_line:s} new line = {new_version_line:s}")
556 else:
557 self.fail()
558
559
560 def test_extract_filename_from_ftp_listing(self):
561 ftp_line = "-rw-r--r-- 1 1000 1000 2901 Sep 26 17:12 allclasses-frame.html"
562 extracted_file_name = "allclasses-frame.html"
563
564
565 [_, match] = pattern_match(self.user_settings.FTP_LISTING, ftp_line)
566 if match:
567 filename = match.group('filename')
568 self.assertEqual(
569 filename,
570 extracted_file_name,
571 f"ftp_line = {ftp_line:s} extracted file name = {extracted_file_name:s}")
572 else:
573 self.fail()
574
575
576 def test_check_replace_substring(self):
577
578
579 old_line = "<div class=\"titlePageWrapper\">" + \
580 "<div class=\"scrollBox\" style=\"height: 50em ;\"> <div class=\"scrollBoxContent\">"
581 new_line = "<div class=\"wrapper titlePage\">" + \
582 "<div class=\"scrollBoxHuge\"> <div class=\"scrollBoxContent\">"
583
584
585 line = old_line
586 for match_replace_pair in self.user_settings.SUBSTRING_REPLACEMENT_LIST:
587
588 [pat, match] = pattern_match(match_replace_pair[0], line)
589
590
591
592
593
594
595
596 if match:
597 new_substring = match_replace_pair[1].strip().lstrip()
598 sub = pat.sub(new_substring, line)
599
600
601 line = sub
602
603
604
605 rewritten_line = line
606 self.assertEqual(
607 new_line,
608 rewritten_line,
609 f"\n new_line = |{new_line:s}|\nrewritten_line = |{rewritten_line:s}|\n")
610
611
612 def test_file_time_and_date(self):
613 file_name = "/Electronics/Images/PowerSupply1Schematic.psd"
614 full_file_name = self.user_settings.master_root_dir + file_name
615 file_epoch_time = os.path.getmtime(full_file_name)
616 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
617 d = datetime.datetime(
618 file_time_utc[0],
619 file_time_utc[1],
620 file_time_utc[2],
621 file_time_utc[3],
622 file_time_utc[4],
623 file_time_utc[5])
624 computed = f"file {file_name:s} datetime {d.ctime():s}"
625 actual = "file /Electronics/Images/PowerSupply1Schematic.psd datetime Tue Jan 3 05:16:49 2023"
626 self.assertEqual(computed, actual)
627
628
629 def test_pattern_match_dir_to_skip(self):
630 dir_skip = "Primpoly-cswhfrwgwdikgzfdpiorbeaiennz"
631 pat = re.compile(self.user_settings.DIR_TO_SKIP)
632 if pat.search(dir_skip):
633 self.assertTrue(True)
634 else:
635 self.assertTrue(False)
636
637
638
639
640
641
642
643 def main(raw_args=None):
644 """Main program. Clean up and update my website."""
645
646
647 print("""
648 updateweb Version 6.3 - A Python utility program which maintains my web site.
649 Copyright (C) 2007-2024 by Sean Erik O'Connor. All Rights Reserved.
650
651 It deletes temporary files, rewrites old copyright lines and email address
652 lines in source files, then synchronizes all changes to my web sites.
653
654 updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
655 GNU General Public License. This is free software, and you are welcome
656 to redistribute it under certain conditions; see the GNU General Public
657 License for details.
658 """)
659
660
661
662
663
664
665 user_settings = UserSettings()
666
667 print(
668 f"Running main( {raw_args} ) Python version {sys.version_info[0]:d}.\
669 {sys.version_info[1]:d}.{sys.version_info[2]:d} local web directory\
670 {user_settings.master_root_dir}\n")
671
672
673
674 CommandLineSettings(user_settings, raw_args)
675
676
677
678 if user_settings.UNITTEST:
679 suite = unittest.TestLoader().loadTestsFromTestCase(UnitTest)
680 unittest.TextTestRunner(verbosity=2).run(suite)
681 sys.exit()
682
683
684
685
686 if user_settings.VERBOSE:
687 loglevel = logging.DEBUG
688 else:
689 loglevel = logging.WARNING
690
691
692 if user_settings.CLEANONLY:
693 user_settings.LOGFILENAME = "/private/logMaster.txt"
694 else:
695 user_settings.LOGFILENAME = "/private/logRemote.txt"
696
697 logging.basicConfig(
698 level=loglevel,
699 format='%(asctime)s %(levelname)-8s %(message)s',
700 datefmt='%a, %d %b %Y %H:%M:%S',
701 filename=user_settings.master_root_dir + user_settings.LOGFILENAME,
702 filemode='w')
703
704 logging.debug("*** Begin logging ******************************")
705
706
707
708
709 try:
710 logging.debug("Scanning master (local on disk) web site")
711 master = MasterWebSite(user_settings)
712
713 print(f"Local web site directory = {user_settings.master_root_dir}")
714
715
716 print("Scanning and cleaning local web site...", end='', flush=True)
717
718 master.scan()
719
720
721
722 logging.debug("Cleaning up master (local on disk) web site")
723 changed = master.clean()
724
725
726 if changed:
727 logging.debug("Detected changes due to to cleanup.")
728 master.finish()
729 logging.debug("Disposing of the old scan.")
730 del master
731
732 master = MasterWebSite(user_settings)
733 logging.debug("*** Rescanning ****************************")
734 master.scan()
735 else:
736 logging.debug("No changes detected. Keeping the original scan.")
737
738 print("...done!", flush=True)
739
740
741 master_directory_list = master.directories
742
743
744 master_files_list = [file_info[user_settings.FILE_NAME]
745 for file_info in master.files]
746
747 logging.debug("*** Master Directories **********************")
748 for d in master_directory_list:
749 logging.debug(f"\t {d:s} (directory)")
750
751 logging.debug("*** Master Files **********************")
752 for f in master_files_list:
753 logging.debug(f"\t {f:s} (file)")
754
755 master.finish()
756
757
758 if user_settings.CLEANONLY:
759 logging.debug("Cleanup finished. Exiting...")
760 sys.exit()
761
762
763
764
765
766 logging.debug("Reading private settings.")
767 private_settings = user_settings.private_settings
768
769 print("Scanning remote web site...", end='', flush=True)
770
771
772 logging.debug("Connecting to primary remote site.")
773 remote = RemoteWebSite(user_settings,
774 private_settings[user_settings.SERVER],
775 private_settings[user_settings.USER],
776 private_settings[user_settings.PASSWORD],
777 private_settings[user_settings.FTP_ROOT])
778
779 logging.debug("Scanning remote web site")
780 remote.scan()
781 remote.finish()
782
783 print("...done!", flush=True)
784
785 remote_directory_list = remote.directories
786 remote_files_list = [file_info[user_settings.FILE_NAME]
787 for file_info in remote.files]
788
789 logging.debug("*** Remote Directories **********************")
790 for d in remote_directory_list:
791 logging.debug(f"\t remote dir: {d:s}")
792
793 logging.debug("*** Remote Files **********************")
794 for f in remote_files_list:
795 logging.debug(f"\t remote file: {f:s}")
796
797
798
799
800
801 print("Synchronizing remote and local web sites...", end='', flush=True)
802
803
804 logging.debug("Connecting to primary remote site for synchronization.")
805 u = UpdateWeb(user_settings,
806 private_settings[user_settings.SERVER],
807 private_settings[user_settings.USER],
808 private_settings[user_settings.PASSWORD],
809 private_settings[user_settings.FTP_ROOT],
810 private_settings[user_settings.FILE_SIZE_LIMIT],
811 master.directories,
812 master.files,
813 remote.directories,
814 remote.files)
815
816 logging.debug("Synchronizing remote web site")
817 u.update()
818 u.finish()
819
820 print("...done!", flush=True)
821
822 del u
823 del remote
824 del master
825
826 except RecursionError as detail:
827 logging.error(
828 f"Walking the directory tree got too deep for Python's recursion {str(detail):s}. Aborting...")
829 sys.exit()
830
831 return
832
833
834
835
836
837
838 class CommandLineSettings(object):
839 """Get the command line options."""
840
841 def __init__(self, user_settings, raw_args=None):
842 """Get command line options"""
843 command_line_parser = argparse.ArgumentParser(
844 description="updateweb options")
845
846
847 command_line_parser.add_argument(
848 "-v",
849 "--verbose",
850 help="Turn on verbose mode to log everything",
851 action="store_true")
852
853
854 command_line_parser.add_argument(
855 "-c",
856 "--cleanonly",
857 help="Do a cleanup on the master web site only.",
858 action="store_true")
859
860
861 command_line_parser.add_argument("-t", "--test",
862 help="Run unit tests.",
863 action="store_true")
864
865 args = command_line_parser.parse_args(raw_args)
866
867 if args.verbose:
868 user_settings.VERBOSE = True
869
870 if args.cleanonly:
871 user_settings.CLEANONLY = True
872
873 if args.test:
874 user_settings.UNITTEST = True
875
876
877
878
879
880
881 class WebSite(object):
882 """
883 Abstract class used for analyzing both master (local to disk) and remote (ftp server) websites.
884 Contains the common web-walking functions which traverse the directory structures and files.
885 Subclasses fill in the lower level functions which actually access the directories and files.
886 Subclasses may also define additional functions unique to local websites.
887 """
888
889 def __init__(self, settings):
890 """Set up root directories"""
891
892
893 self.user_settings = settings
894
895
896 self.queue = []
897
898
899 self.directories = []
900
901
902 self.files = []
903
904
905 self.root_dir = self.get_root_dir()
906 self.go_to_root_dir(self.root_dir)
907
908 @staticmethod
909 def get_current_year():
910 """Get the current year."""
911 return int(time.gmtime()[0])
912
913 @staticmethod
914 def get_current_two_digit_year():
915 """Get the last two digits of the current year."""
916 return WebSite.get_current_year() % 100
917
918 @staticmethod
919 def is_file_info_type(file_info):
920 """Check if we have a file information structure or merely a simple file name."""
921 try:
922 if isinstance(file_info, list):
923 return True
924 elif isinstance(file_info, str):
925 return False
926 else:
927 logging.error(
928 "is_file_info_type found a bad type. Aborting...")
929 sys.exit()
930 except TypeError as detail:
931 logging.error(
932 f"is_file_info_type found a bad type {str(detail):s}. Aborting...")
933 sys.exit()
934
935 def get_root_dir(self):
936 """Subclass: Put code here to get the root directory"""
937 return ""
938
939 def go_to_root_dir(self, root_dir):
940 """Subclass: Put code here to go to the root directory"""
941 pass
942
943 def one_level_down(self, d):
944 """Subclass: Fill in with a method which returns a list of the
945 directories and files immediately beneath dir"""
946 return [], []
947
948 def walk(self, d, type_of_tree_search=TreeWalk.BREADTH_FIRST_SEARCH):
949 """Walk a directory in either depth first or breadth first order. BFS is the default."""
950
951
952 subdirectories, subfiles = self.one_level_down(d)
953
954
955 for f in subfiles:
956
957 name = self.strip_root(f)
958 logging.debug(
959 f"Webwalking: Adding file {name[self.user_settings.FILE_NAME]:s} to list.")
960
961
962 pat = re.compile(self.user_settings.FILE_TO_SKIP)
963
964 if pat.search(name[self.user_settings.FILE_NAME]):
965 logging.warning(
966 f"Webwalking: Skipping private file {name[self.user_settings.FILE_NAME]:s}")
967
968 elif name[self.user_settings.FILE_NAME].find(self.user_settings.LOGFILENAME) >= 0:
969 logging.debug(
970 f"Webwalking: Skipping log file {name[self.user_settings.FILE_NAME]:s}")
971
972 else:
973 self.files.append(name)
974
975
976 for d in subdirectories:
977
978
979
980 pat = re.compile(self.user_settings.DIR_TO_SKIP)
981 if pat.search(d):
982 logging.warning(f"Webwalking: Skipping private dir {d:s}")
983 else:
984 logging.debug(f"Webwalking: Pushing dir {d:s} on the queue.")
985 self.queue.append(d)
986
987
988 while len(self.queue) > 0:
989
990 if type_of_tree_search == TreeWalk.BREADTH_FIRST_SEARCH:
991 d = self.queue.pop(0)
992
993
994 elif type_of_tree_search == TreeWalk.DEPTH_FIRST_SEARCH:
995 d = self.queue.pop()
996 else:
997 d = self.queue.pop(0)
998
999 name = self.strip_root(d)
1000 logging.debug(
1001 f"Webwalking: Adding relative directory {name:s} to list, full path = {d:s}.")
1002 self.directories.append(name)
1003
1004 self.walk(d)
1005
1006 def strip_root(self, file_info):
1007 """Return a path, but strip off the root directory"""
1008
1009 root = self.root_dir
1010
1011
1012 if self.is_file_info_type(file_info):
1013 name = file_info[self.user_settings.FILE_NAME]
1014 else:
1015 name = file_info
1016
1017
1018
1019
1020 lenroot = len(root)
1021 if root == self.user_settings.DEFAULT_ROOT_DIR:
1022 pass
1023 else:
1024 lenroot = lenroot + 1
1025
1026 stripped_path = name[lenroot:]
1027
1028 if self.is_file_info_type(file_info):
1029
1030 return [stripped_path,
1031 file_info[self.user_settings.FILE_TYPE],
1032 file_info[self.user_settings.FILE_DATE_TIME],
1033 file_info[self.user_settings.FILE_SIZE]]
1034 else:
1035 return stripped_path
1036
1037 def append_root_dir(self, root_dir, name):
1038 """Append the root directory to a path"""
1039
1040
1041
1042 if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1043 return root_dir + name
1044 else:
1045 return root_dir + "/" + name
1046
1047 def scan(self):
1048 """Scan the directory tree recursively from the root"""
1049 logging.debug(
1050 f"Webwalking: Beginning recursive directory scan from root directory {self.root_dir:s}")
1051 self.walk(self.root_dir)
1052
1053 def modtime(self, f):
1054 """Subclass: Get file modification time"""
1055 pass
1056
1057 def finish(self):
1058 """Quit web site walking"""
1059 logging.debug("Finished webwalking the master.")
1060 pass
1061
1062 def remove_dir(self, dir_name):
1063 """Subclass: Remove a directory"""
1064 pass
1065
1066 def remove_file(self, file_name):
1067 """Subclass: Remove a file"""
1068 pass
1069
1070 def clean(self):
1071 """Scan through all directories and files in the master on disk website and clean them up."""
1072 num_changes = 0
1073
1074 logging.debug("Cleaning up the master web page.")
1075
1076 if self.directories is None or self.files is None:
1077 logging.error("Web site has no directories or files. Aborting...")
1078 sys.exit()
1079
1080 for d in self.directories:
1081
1082 if self.is_temp_dir(d):
1083
1084 name = self.append_root_dir(self.get_root_dir(), d)
1085 try:
1086 logging.debug(
1087 f"Removing temp dir {self.root_dir:s} recursively")
1088 shutil.rmtree(name)
1089 num_changes += 1
1090 except OSError as detail:
1091 logging.error(
1092 f"Cannot remove temp dir {name:s}: {str(detail):s}")
1093
1094 for f in self.files:
1095
1096
1097 name = self.append_root_dir(
1098 self.get_root_dir(), f[self.user_settings.FILE_NAME])
1099
1100
1101 if self.is_temp_file(f):
1102 try:
1103 logging.debug(f"Removing temp file {name:s}")
1104 os.remove(name)
1105 num_changes += 1
1106 except OSError as detail:
1107 logging.error(
1108 f"Cannot remove temp dir {name:s}: {str(detail):s}")
1109
1110
1111 if self.is_source_file(f):
1112 changed = self.rewrite_source_file(name)
1113 if changed:
1114 num_changes += 1
1115 logging.debug(f"Rewrote hypertext file {self.root_dir:s}")
1116
1117
1118 if num_changes > 0:
1119 return True
1120
1121 return False
1122
1123 def is_temp_file(self, file_info):
1124 """Identify a file name as a temporary file"""
1125
1126 file_name = file_info[self.user_settings.FILE_NAME]
1127
1128
1129
1130
1131 [_, match] = pattern_match(
1132 self.user_settings.TEMP_FILE_SUFFIXES, file_name)
1133
1134 if match or file_name.find(self.user_settings.VIM_TEMP_FILE_EXT) >= 0:
1135 return True
1136
1137 return False
1138
1139 def is_temp_dir(self, dir_name):
1140 """Identify a name as a temporary directory."""
1141
1142 p = re.compile(self.user_settings.TEMP_DIR_SUFFIX, re.VERBOSE)
1143 return p.search(dir_name)
1144
1145 def is_source_file(self, file_info):
1146 """ Check if the file name is a hypertext file."""
1147
1148 file_name = file_info[self.user_settings.FILE_NAME]
1149 p = re.compile(self.user_settings.SOURCE_FILE_SUFFIX, re.VERBOSE)
1150 return p.search(file_name)
1151
1152 def copy_to_text_file(self, file_name):
1153 """Make a copy of a file with a .txt extension"""
1154 pass
1155
1156 def clean_up_temp_file(self, temp_file_name, file_name, changed):
1157 """Remove the original file, rename the temporary file name to the original name.
1158 If there are no changes, just remove the temporary file.
1159 """
1160 pass
1161
1162 def process_lines_of_file(
1163 self,
1164 in_file_name,
1165 out_file_name,
1166 process_line_function_list=None):
1167 """Process each line of a file with a list of functions. Create a new temporary file.
1168 The default list is None which means make an exact copy.
1169 """
1170 pass
1171
1172 def rewrite_substring(self, line):
1173 """Rewrite a line containing a pattern of your choice"""
1174
1175
1176 for match_replace_pair in self.user_settings.SUBSTRING_REPLACEMENT_LIST:
1177
1178
1179 [pat, match] = pattern_match(match_replace_pair[0], line)
1180
1181
1182 if match:
1183
1184
1185 new_substring = match_replace_pair[1].strip().lstrip()
1186 sub = pat.sub(new_substring, line)
1187 logging.debug(
1188 f"\ntransform old line = \n{line:s}\ninto new line =\n\
1189 {sub:s}\nusing new substring =\n{new_substring:s}\n")
1190 line = sub
1191
1192 return line
1193
1194 def rewrite_email_address_line(self, line):
1195 """Rewrite lines containing old email addresses."""
1196
1197
1198 [pat, match] = pattern_match(
1199 self.user_settings.OLD_EMAIL_ADDRESS, line)
1200
1201
1202 if match:
1203 new_address = self.user_settings.NEW_EMAIL_ADDRESS
1204 sub = pat.sub(new_address, line)
1205 line = sub
1206
1207 return line
1208
1209 def rewrite_version_line(self, line):
1210 """Rewrite lines containing the current version of software."""
1211
1212
1213 [pat, match] = pattern_match(
1214 self.user_settings.CURRENT_SOFTWARE_VERSION, line)
1215
1216
1217 if match:
1218
1219
1220 new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
1221 sub = pat.sub(new_version, line)
1222 line = sub
1223
1224 return line
1225
1226 def rewrite_copyright_line(self, line):
1227 """Rewrite copyright lines if they are out of date."""
1228
1229
1230
1231
1232
1233 [pat, match] = pattern_match(self.user_settings.COPYRIGHT_LINE, line)
1234
1235
1236 if match:
1237 old_year = int(match.group('old_year'))
1238
1239
1240
1241
1242
1243
1244 if old_year < WebSite.get_current_year():
1245 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
1246 str(WebSite.get_current_year())
1247 sub = pat.sub(new_copyright, line)
1248 line = sub
1249 return line
1250
1251 def rewrite_last_update_line(self, line):
1252 """Rewrite the Last Updated line if the year is out of date."""
1253
1254
1255
1256 p = re.compile(
1257 self.user_settings.LAST_UPDATED_LINE,
1258 re.VERBOSE | re.IGNORECASE)
1259 m = p.search(line)
1260
1261 if m:
1262 last_update_year = int(m.group('year'))
1263
1264
1265 if last_update_year > 90:
1266 last_update_year += 1900
1267 else:
1268 last_update_year += 2000
1269
1270
1271 if last_update_year < WebSite.get_current_year():
1272 two_digit_year = self.user_settings.TWO_DIGIT_YEAR_FORMAT % self.get_current_two_digit_year()
1273 sub = p.sub('last updated 01 Jan ' + two_digit_year, line)
1274 line = sub
1275
1276 return line
1277
1278 def rewrite_source_file(self, file_name):
1279 """Rewrite copyright lines, last updated lines, etc."""
1280 changed = False
1281
1282
1283 temp_file_name = file_name + self.user_settings.TEMP_FILE_EXT
1284
1285
1286
1287 if self.process_lines_of_file(file_name, temp_file_name,
1288 [self.rewrite_copyright_line,
1289 self.rewrite_last_update_line,
1290 self.rewrite_email_address_line,
1291 self.rewrite_substring,
1292 self.rewrite_version_line]):
1293 changed = True
1294
1295
1296
1297 self.clean_up_temp_file(temp_file_name, file_name, changed)
1298
1299 return changed
1300
1301
1302
1303
1304
1305
1306 class MasterWebSite(WebSite):
1307 """Walk the master web directory on local disk down from the root.
1308 Clean up temporary files and do other cleanup work."""
1309
1310 def __init__(self, settings):
1311 """Go to web page root and list all files and directories."""
1312
1313
1314 WebSite.__init__(self, settings)
1315
1316 self.root_dir = self.get_root_dir()
1317 logging.debug(
1318 f"MasterWebSite.__init__(): \tRoot directory: {self.root_dir:s}")
1319
1320 def get_root_dir(self):
1321 """Get the name of the root directory"""
1322 return self.user_settings.master_root_dir
1323
1324 def go_to_root_dir(self, root_dir):
1325 """Go to the root directory"""
1326
1327
1328 logging.debug(
1329 f"MasterWebSite.go_to_root_dir(): \tchdir to root directory: {root_dir:s}")
1330 os.chdir(root_dir)
1331
1332
1333 self.root_dir = os.getcwd()
1334 logging.debug(
1335 f"MasterWebSite.go_to_root_dir(): \tgetcwd root directory: {self.root_dir:s}")
1336
1337 def one_level_down(self, d):
1338 """List all files and subdirectories in the current directory, dir. For files, collect file info
1339 such as time, date and size."""
1340
1341 directories = []
1342 files = []
1343
1344
1345 os.chdir(d)
1346
1347
1348 dir_list = os.listdir(d)
1349
1350 if dir_list:
1351 for line in dir_list:
1352 logging.debug(
1353 f"MasterWebSite.one_level_down(): \tlistdir( {d:s} ) = {line:s}")
1354
1355
1356 name = self.append_root_dir(d, line)
1357 logging.debug(
1358 f"MasterWebSite.one_level_down(): \tmaster dir/file (full path): {name:s}")
1359
1360
1361 if os.path.isdir(name):
1362 directories.append(name)
1363 elif os.path.isfile(name):
1364
1365
1366
1367
1368
1369
1370 file_info = [name,
1371 FileType.FILE,
1372 self.get_file_date_time(name),
1373 self.get_file_size(name)]
1374 files.append(file_info)
1375
1376
1377 if directories:
1378 directories.sort()
1379 if files:
1380 files.sort()
1381
1382 return directories, files
1383
1384 @staticmethod
1385 def get_file_date_time(file_name):
1386 """Get a local file time and date in UTC."""
1387
1388 file_epoch_time = os.path.getmtime(file_name)
1389 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1390
1391 d = datetime.datetime(file_time_utc[0], file_time_utc[1],
1392 file_time_utc[2], file_time_utc[3],
1393 file_time_utc[4], file_time_utc[5])
1394 return d
1395
1396 @staticmethod
1397 def get_file_size(file_name):
1398 """Get file size in bytes."""
1399 return os.path.getsize(file_name)
1400
1401 def copy_to_text_file(self, file_name):
1402 """Make a copy of a file with a .txt extension"""
1403
1404
1405 copy_file_name = file_name + self.user_settings.TEXT_FILE_EXT
1406 try:
1407 os.remove(copy_file_name)
1408 except OSError as detail:
1409 logging.error(
1410 f"Cannot remove old text file copy {copy_file_name:s}: {str(detail):s}")
1411
1412
1413 self.process_lines_of_file(file_name, copy_file_name)
1414
1415
1416
1417
1418
1419 file_stat = os.stat(file_name)
1420 os.utime(copy_file_name,
1421 (file_stat[stat.ST_ATIME],
1422 file_stat[stat.ST_MTIME]))
1423 logging.debug(
1424 f"Reset file time to original time for copy {copy_file_name:s}")
1425
1426 def clean_up_temp_file(self, temp_file_name, file_name, changed):
1427 """Remove the original file, rename the temporary file name to the original name.
1428 If there are no changes, just remove the temporary file.
1429 """
1430
1431 if changed:
1432
1433 try:
1434 os.remove(file_name)
1435 logging.debug(
1436 f"Changes were made. Remove original file {file_name:s}")
1437 except OSError as detail:
1438 logging.error(
1439 f"Cannot remove old file {file_name:s}: {str(detail):s}. Need to remove it manually.")
1440
1441
1442 try:
1443 os.rename(temp_file_name, file_name)
1444 logging.debug(
1445 f"Rename temp file {temp_file_name:s} to original file {file_name:s}")
1446 except OSError as detail:
1447 logging.error(
1448 f"Cannot rename temporary file {temp_file_name:s} to old file name {file_name:s}: {str(detail):s}."
1449 f"Need to rename manually")
1450 else:
1451
1452 try:
1453 os.remove(temp_file_name)
1454 logging.debug(
1455 f"No changes were made. Remove temporary file {temp_file_name:s}")
1456 except OSError as detail:
1457 logging.error(
1458 f"Cannot remove temporary file {temp_file_name:s}: {str(detail):s}. Need to remove it manually.")
1459 return
1460
1461 def process_lines_of_file(
1462 self,
1463 in_file_name,
1464 out_file_name,
1465 process_line_function_list=None):
1466 """Process each line of a file with a list of functions. Create a new temporary file.
1467 The default list is None which means make an exact copy.
1468 """
1469
1470 fin = None
1471 fout = None
1472
1473
1474 changed = False
1475
1476 try:
1477 fin = open(in_file_name, "r")
1478 except IOError as detail:
1479 logging.error(
1480 f"process_lines_of_file(): \tCannot open file {in_file_name:s} for reading: {str(detail):s}")
1481
1482 try:
1483 fout = open(out_file_name, "w")
1484 except IOError as detail:
1485 logging.error(
1486 f"process_lines_of_file(): \tCannot open file {out_file_name:s} for writing: {str(detail):s}")
1487
1488
1489 try:
1490 line = fin.readline()
1491
1492 while line:
1493 original_line = line
1494 if process_line_function_list is None:
1495
1496 pass
1497 else:
1498
1499 for processLineFunction in process_line_function_list:
1500 line = processLineFunction(line)
1501
1502 if original_line != line:
1503 logging.debug(
1504 f"Rewrote the line >>>{original_line:s}<<< to >>>{line:s}<<<")
1505 changed = True
1506
1507 fout.write(line)
1508
1509 line = fin.readline()
1510
1511 fin.close()
1512 fout.close()
1513 except IOError as detail:
1514 logging.error(
1515 f"File I/O error during reading/writing file {in_file_name:s} in process_lines_of_file: {str(detail):s}"
1516 f" Aborting...")
1517 sys.exit()
1518
1519 if changed:
1520 logging.debug(
1521 f"process_lines_of_file(): \tRewrote original file {in_file_name:s}."
1522 f"Changes are in temporary copy {out_file_name:s}")
1523
1524
1525 return changed
1526
1527
1528
1529
1530
1531
1532 class RemoteWebSite(WebSite):
1533 """Walk the remote web directory on a web server down from the root."""
1534
1535 def __init__(self, settings, server, user, password, ftproot):
1536 """Connect to FTP server and list all files and directories."""
1537
1538
1539 self.root_dir = ftproot
1540 logging.debug(
1541 f"Requesting remote web site ftp root dir {self.root_dir:s}")
1542
1543
1544 try:
1545
1546 self.ftp = ftplib.FTP(server)
1547 self.ftp.login(user, password)
1548
1549
1550 except Exception as detail:
1551
1552 logging.error(
1553 f"Remote web site cannot login to ftp server: {str(detail):s} Aborting...")
1554 sys.exit()
1555 else:
1556 logging.debug("Remote web site ftp login succeeded.")
1557
1558 logging.debug(
1559 f"Remote web site ftp welcome message {self.ftp.getwelcome():s}")
1560
1561
1562 WebSite.__init__(self, settings)
1563
1564 def go_to_root_dir(self, root_dir):
1565 """Go to the root directory"""
1566
1567 try:
1568
1569 self.ftp.cwd(root_dir)
1570 logging.debug(
1571 f"ftp root directory (requested) = {self.root_dir:s}")
1572
1573
1574 self.root_dir = self.ftp.pwd()
1575 logging.debug(
1576 f"ftp root directory (read back from server): {self.root_dir:s}")
1577
1578 except Exception as detail:
1579 logging.error(
1580 f"go_to_root_dir(): \tCannot ftp cwd or pwd root dir {root_dir:s} {str(detail):s} Aborting...")
1581 sys.exit()
1582
1583 def get_root_dir(self):
1584 """Get the root directory name"""
1585
1586 return self.root_dir
1587
1588 def finish(self):
1589 """Quit web site walking"""
1590
1591 logging.debug("RemoteWebSite::finish().")
1592 try:
1593 self.ftp.quit()
1594 except Exception as detail:
1595 logging.error(f"Cannot ftp quit: {str(detail):s}")
1596
1597 def one_level_down(self, d):
1598 """List files and directories in a subdirectory using ftp"""
1599
1600 directories = []
1601 files = []
1602
1603 try:
1604
1605 logging.debug(f"RemoteWebSite.one_level_down(): \tftp cwd: {d:s}")
1606 self.ftp.cwd(d)
1607 dir_list = []
1608
1609 self.ftp.retrlines('LIST', dir_list.append)
1610 except Exception as detail:
1611 logging.error(
1612 f"one_level_down(): \tCannot ftp cwd or ftp LIST dir {d:s}: {str(detail):s} Aborting...")
1613 sys.exit()
1614
1615 for line in dir_list:
1616 logging.debug(
1617 f"RemoteWebSite.one_level_down(): \tftp LIST: {line:s}")
1618
1619
1620 if len(line) >= self.user_settings.MIN_FTP_LINE_LENGTH:
1621 file_info = self.get_ftp_file_info(line)
1622
1623 if file_info[self.user_settings.FILE_NAME] == "":
1624 logging.error(
1625 "RemoteWebSite.one_level_down(): \tFTP LIST file name is NULL:")
1626
1627 logging.debug(
1628 f"RemoteWebSite.one_level_down(): \tftp parsed file info:\
1629 {file_info[self.user_settings.FILE_NAME]:s}")
1630
1631
1632
1633 if file_info[self.user_settings.FILE_TYPE] == FileType.DIRECTORY:
1634 dirname = self.append_root_dir(
1635 d, file_info[self.user_settings.FILE_NAME])
1636 logging.debug(
1637 f"RemoteWebSite.one_level_down(): \tftp dir (full path): {dirname:s}")
1638 directories.append(dirname)
1639
1640 else:
1641
1642
1643 file_info[self.user_settings.FILE_NAME] = self.append_root_dir(
1644 d, file_info[self.user_settings.FILE_NAME])
1645 logging.debug(
1646 f"RemoteWebSite.one_level_down(): \tftp file (full path):\
1647 {file_info[self.user_settings.FILE_NAME]:s}")
1648 files.append(file_info)
1649 else:
1650 logging.error(
1651 f"RemoteWebSite.one_level_down(): \tFTP LIST line is too short: {line:s}")
1652
1653 directories.sort()
1654 files.sort()
1655
1656 return directories, files
1657
1658 def modtime(self, f):
1659 """Get the modification time of a file via ftp. Return 0 if ftp cannot get it."""
1660 modtime = 0
1661
1662 try:
1663 response = self.ftp.sendcmd('MDTM ' + f)
1664
1665
1666
1667
1668
1669 if response[:3] == '213':
1670 modtime = response[4:]
1671 except ftplib.error_perm:
1672 modtime = 0
1673
1674 return modtime
1675
1676 def get_ftp_file_info(self, line):
1677 """Parse the ftp file listing and return file name, datetime and file size.
1678
1679 FTP uses UTC for its listings; the conversion to local time is done by the OS.
1680
1681 We can have problems on New Year's Eve. For example, the master file date/time is
1682 Mon Jan 1 06:23:12 2018
1683 But the remote file date/time from FTP listing doesn't show a year even though we know it was written to the server in 2017.
1684 Mon Dec 31 03:02:00
1685 So we default the remote file year to current year 2018 and get
1686 Mon Dec 31 03:02:00 2018
1687 Now we think that the remote file is newer by 363.860278 days.
1688 """
1689
1690
1691 if line[0] == 'd':
1692 dir_or_file = FileType.DIRECTORY
1693 else:
1694 dir_or_file = FileType.FILE
1695
1696 pattern = re.compile(self.user_settings.FTP_LISTING, re.VERBOSE)
1697
1698
1699 filesize = 0
1700 filename = ""
1701
1702 hour = 0
1703 minute = 0
1704 seconds = 0
1705
1706 month = 1
1707 day = 1
1708
1709
1710 match = pattern.search(line)
1711
1712 logging.debug(f"ftp file listing {line}")
1713
1714 if match:
1715 filesize = int(match.group('bytes'))
1716 month = self.user_settings.monthToNumber[match.group('mon')]
1717 day = int(match.group('day'))
1718
1719
1720 if match.group('year'):
1721 year = int(match.group('year'))
1722 logging.debug(f"ftp has year = {year} but is probably missing hour and minute")
1723 else:
1724
1725
1726 year = WebSite.get_current_year()
1727 logging.debug(f"ftp is missing the year; use the current year = {year}")
1728
1729
1730 if match.group('hour') and match.group('min'):
1731 hour = int(match.group('hour'))
1732 minute = int(match.group('min'))
1733 logging.debug(f"ftp has hour = {hour} and minute = {minute} so is probably missing the year")
1734
1735 filename = match.group('filename')
1736
1737
1738
1739
1740 d = datetime.datetime(year, month, day, hour, minute, seconds)
1741
1742 return [filename, dir_or_file, d, filesize]
1743
1744
1745 class UpdateWeb(object):
1746 """Given previously scanned master and remote directories, update the remote website."""
1747
1748 def __init__(
1749 self,
1750 settings,
1751 server,
1752 user,
1753 password,
1754 ftproot,
1755 file_size_limit,
1756 master_directory_list,
1757 master_file_info,
1758 remote_directory_list,
1759 remote_file_info):
1760 """Connect to remote site. Accept previously scanned master and remote files and directories."""
1761
1762 self.user_settings = settings
1763
1764 self.master_files_list = []
1765 self.remote_files_list = []
1766 self.master_file_to_size = {}
1767 self.master_file_to_date_time = {}
1768 self.remote_file_to_date_time = {}
1769 self.master_only_dirs = []
1770 self.master_only_files = []
1771 self.remote_only_dirs = []
1772 self.remote_only_files = []
1773 self.common_files = []
1774
1775
1776 try:
1777 self.ftp = ftplib.FTP(server)
1778 self.ftp.login(user, password)
1779 except Exception as detail:
1780 logging.error(
1781 f"Cannot login to ftp server: {str(detail):s} Aborting...")
1782 sys.exit()
1783 else:
1784 logging.debug("ftp login succeeded.")
1785
1786 logging.debug(
1787 f"ftp server welcome message: {self.ftp.getwelcome():s}")
1788
1789
1790 self.master_root_dir = self.user_settings.master_root_dir
1791 logging.debug(
1792 f"Master (local to disk) root directory: {self.master_root_dir:s}")
1793
1794
1795 self.ftp_root_dir = ftproot
1796 logging.debug(
1797 f"ftp root directory (requested) = {self.ftp_root_dir:s}")
1798
1799
1800 self.file_size_limit = int(file_size_limit) * 1024
1801
1802 try:
1803
1804 self.ftp.cwd(self.ftp_root_dir)
1805
1806
1807 self.ftp_root_dir = self.ftp.pwd()
1808 logging.debug(
1809 f"ftp root directory (read back from server): {self.ftp_root_dir:s}")
1810 except Exception as detail:
1811 logging.error(
1812 f"UpdateWeb(): \tCannot ftp cwd or ftp LIST dir {self.ftp_root_dir:s} {str(detail):s} Aborting...")
1813
1814 self.master_directory_list = master_directory_list
1815 self.remote_directory_list = remote_directory_list
1816 self.master_file_info = master_file_info
1817 self.remote_file_info = remote_file_info
1818
1819 def append_root_dir(self, root_dir, name):
1820 """Append the root directory to a path"""
1821
1822
1823
1824 if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1825 return root_dir + name
1826 else:
1827 return root_dir + "/" + name
1828
1829 def file_info(self):
1830 """Create lists of file names from the file information. Also create dictionaries which map file names onto
1831 dates, times, and sizes."""
1832
1833
1834 self.master_files_list = [
1835 file_info[self.user_settings.FILE_NAME] for file_info in self.master_file_info]
1836 self.remote_files_list = [
1837 file_info[self.user_settings.FILE_NAME] for file_info in self.remote_file_info]
1838
1839
1840
1841 self.master_file_to_date_time = {
1842 file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME]
1843 for file_info in self.master_file_info}
1844 self.remote_file_to_date_time = {
1845 file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME]
1846 for file_info in self.remote_file_info}
1847
1848
1849
1850 self.master_file_to_size = {file_info[self.user_settings.FILE_NAME]
1851 : file_info[self.user_settings.FILE_SIZE] for file_info in self.master_file_info}
1852
1853 def update(self):
1854 """Scan through the master website, cleaning it up.
1855 Go to remote website on my servers and synchronize all files."""
1856
1857 self.file_info()
1858
1859
1860 self.changes()
1861
1862
1863 self.synchronize()
1864
1865 def changes(self):
1866 """Find the set of different directories and files on master and remote."""
1867
1868
1869 dir_to_type = {
1870 d: FileType.ON_MASTER_ONLY for d in self.master_directory_list}
1871
1872
1873
1874 for d in self.remote_directory_list:
1875 if d in dir_to_type:
1876 dir_to_type[d] = FileType.ON_BOTH_MASTER_AND_REMOTE
1877 else:
1878 dir_to_type[d] = FileType.ON_REMOTE_ONLY
1879
1880
1881 file_to_type = {
1882 f: FileType.ON_MASTER_ONLY for f in self.master_files_list}
1883
1884
1885
1886 for f in self.remote_files_list:
1887 if f in file_to_type:
1888 file_to_type[f] = FileType.ON_BOTH_MASTER_AND_REMOTE
1889 else:
1890 file_to_type[f] = FileType.ON_REMOTE_ONLY
1891
1892 logging.debug("Raw dictionary dump of directories")
1893 for k, v in dir_to_type.items():
1894 logging.debug(f"\t dir: {str(k):s} type: {str(v):s}")
1895
1896 logging.debug("Raw dictionary dump of files")
1897 for k, v in file_to_type.items():
1898 logging.debug(f"\t file: {str(k):s} type: {str(v):s}")
1899
1900
1901 self.master_only_dirs = [
1902 d for d in self.master_directory_list if dir_to_type[d] == FileType.ON_MASTER_ONLY]
1903
1904
1905 self.remote_only_dirs = [
1906 d for d in self.remote_directory_list if dir_to_type[d] == FileType.ON_REMOTE_ONLY]
1907
1908
1909
1910
1911
1912 self.master_only_files = [
1913 f for f in self.master_files_list if file_to_type[f] == FileType.ON_MASTER_ONLY]
1914
1915
1916 self.remote_only_files = [
1917 f for f in self.remote_files_list if file_to_type[f] == FileType.ON_REMOTE_ONLY]
1918
1919
1920 self.common_files = [
1921 f for f in self.master_files_list if file_to_type[f] == FileType.ON_BOTH_MASTER_AND_REMOTE]
1922
1923 logging.debug(
1924 "*** Directories only on master ******************************")
1925 for d in self.master_only_dirs:
1926 logging.debug(f"\t {d:s}")
1927
1928 logging.debug(
1929 "*** Directories only on remote ******************************")
1930 for d in self.remote_only_dirs:
1931 logging.debug(f"\t {d:s}")
1932
1933 logging.debug(
1934 "*** Files only on master ******************************")
1935 for f in self.master_only_files:
1936 logging.debug(f"\t {f:s}")
1937
1938 logging.debug(
1939 "*** Files only on remote ******************************")
1940 for f in self.remote_only_files:
1941 logging.debug(f"\t {f:s}")
1942
1943 logging.debug("*** Common files ******************************")
1944 for f in self.common_files:
1945 logging.debug(
1946 f"\tname {f:s} master time {self.master_file_to_date_time[f].ctime():s} remote time {self.remote_file_to_date_time[f].ctime():s}")
1947
1948 def synchronize(self):
1949 """Synchronize files and subdirectories in the remote directory with the master directory."""
1950
1951
1952
1953 for f in self.common_files:
1954 master_file_time = self.master_file_to_date_time[f]
1955 remote_file_time = self.remote_file_to_date_time[f]
1956
1957
1958 days_different = abs((remote_file_time -
1959 master_file_time).days +
1960 (remote_file_time -
1961 master_file_time).seconds /
1962 (60.0 *
1963 60.0 *
1964 24.0))
1965
1966
1967 upload_to_host = False
1968
1969 logging.debug(f"Common file: {f:s}.")
1970
1971
1972 if remote_file_time > master_file_time:
1973
1974
1975 if days_different >= self.user_settings.DAYS_NEWER_FOR_REMOTE_NEW_YEARS_GLITCH:
1976 logging.error(
1977 f"Remote file {f:s} is newer by {days_different:f}\
1978 days. Probably New Year's glitch. Upload file to be safe.")
1979 logging.error(
1980 f"\tmaster time {master_file_time.ctime():s} remote time\
1981 {remote_file_time.ctime():s}")
1982
1983
1984 full_file_name = self.append_root_dir(
1985 self.master_root_dir, f)
1986 if os.path.exists(full_file_name):
1987 os.utime(full_file_name, None)
1988 logging.error(
1989 f"Touching master file {full_file_name:s} to make it the current time")
1990
1991 upload_to_host = True
1992
1993
1994 else:
1995 logging.debug(
1996 f"Remote file {f:s} is newer by {days_different:f} days."
1997 f"Probably time inaccuracy on the server. Wait -- don't upload yet.")
1998 logging.debug(
1999 f"\tmaster time {master_file_time.ctime():s} remote time {remote_file_time.ctime():s}")
2000 upload_to_host = False
2001
2002
2003 elif master_file_time > remote_file_time:
2004
2005
2006 if days_different >= self.user_settings.DAYS_NEWER_FOR_MASTER_BEFORE_UPLOAD:
2007 logging.warning(
2008 f"Master file {f:s} is newer by {days_different:f} days. Preparing for upload.")
2009 logging.warning(
2010 f"\tmaster time {master_file_time.ctime():s} remote time {remote_file_time.ctime():s}")
2011 upload_to_host = True
2012 else:
2013 logging.debug(
2014 f"Master file {f:s} is slightly newer by {days_different:f} days. Wait -- don't upload yet.")
2015 logging.debug(
2016 f"\tmaster time {master_file_time.ctime():s} remote time {remote_file_time.ctime():s}")
2017 upload_to_host = False
2018
2019
2020 size = self.master_file_to_size[f]
2021 if size >= self.file_size_limit:
2022 logging.error(
2023 f"upload(): Skipping upload of file {f:s} of size {size:d};\
2024 too large for server, limit is {self.file_size_limit:d} bytes")
2025 upload_to_host = False
2026
2027
2028 if upload_to_host:
2029 print(f"Uploading changed file {f:s}...", end='', flush=True)
2030 self.upload(f)
2031
2032
2033 for d in self.remote_only_dirs:
2034 logging.debug(f"Remote only dir. Attempting to delete it: {d:s}")
2035 print(f"Deleting remote directory {d:s}...", end='', flush=True)
2036 self.rmdir(d)
2037
2038
2039
2040
2041 for d in self.master_only_dirs:
2042 logging.debug(f"Master only dir. Creating dir {d:s} on remote.")
2043 print(
2044 f"Creating new remote directory {d:s}...",
2045 end='',
2046 flush=True)
2047 self.mkdir(d)
2048
2049
2050 for f in self.master_only_files:
2051 logging.debug(f"Master only file. Uploading {f:s} to remote.")
2052
2053
2054 size = self.master_file_to_size[f]
2055 if size >= self.file_size_limit:
2056 logging.error(
2057 f"upload(): Skipping upload of file {f:s} of size {size:d};"
2058 f" too large for server, limit is {self.file_size_limit:d} bytes")
2059 else:
2060 print(f"Uploading new file {f:s}...", end='', flush=True)
2061 self.upload(f)
2062
2063
2064 for f in self.remote_only_files:
2065 logging.debug(f"Remote only file. Deleting remote file {f:s}.")
2066 print(f"Deleting remote file {f:s}...", end='', flush=True)
2067 self.del_remote(f)
2068
2069 def del_remote(self, relative_file_path):
2070 """Delete a file using ftp."""
2071
2072 logging.debug(
2073 f"del_remote(): \trelative file path name: {relative_file_path:s}")
2074
2075
2076 relative_dir, file_name = os.path.split(relative_file_path)
2077 logging.debug(f"del_remote(): \tfile name: {file_name:s}")
2078 logging.debug(f"del_remote(): \trelative dir: {relative_dir:s}")
2079 logging.debug(
2080 f"del_remote(): \tremote root dir: {self.ftp_root_dir:s}")
2081
2082 try:
2083
2084 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2085 logging.debug(
2086 f"del_remote(): \tftp cd remote dir: {remote_dir:s}")
2087 self.ftp.cwd(remote_dir)
2088 except Exception as detail:
2089 logging.error(
2090 f"del_remote(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2091 else:
2092 try:
2093 logging.debug(f"del_remote(): \tftp rm: {file_name:s}")
2094
2095
2096 if len(file_name) > 0:
2097 self.ftp.delete(file_name)
2098 else:
2099 logging.warning(
2100 "fdel_remote(): skipping ftp delete; file NAME {file_name:s} had zero length")
2101 except Exception as detail:
2102 logging.error(
2103 f"del_remote(): \tCannot ftp rm: {str(detail):s}")
2104
2105 def mkdir(self, relative_dir):
2106 """Create new remote directory using ftp."""
2107
2108 logging.debug(f"mkdir(): \trelative dir path name: {relative_dir:s}")
2109 logging.debug(f"mkdir(): \tremote root dir: {self.ftp_root_dir:s}")
2110
2111
2112 path, d = os.path.split(relative_dir)
2113 logging.debug(f"mkdir(): \tremote prefix dir: {path:s}")
2114 logging.debug(f"mkdir(): \tremote dir: {d:s}")
2115
2116 try:
2117
2118 remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2119 logging.debug(f"mkdir(): \tftp cd remote dir: {remote_dir:s}")
2120 self.ftp.cwd(remote_dir)
2121 except Exception as detail:
2122 logging.error(
2123 f"mkdir(): \tCannot ftp chrdir: {str(detail):s} Skipping...")
2124 else:
2125 try:
2126 logging.debug(f"mkdir(): \tftp mkd: {d:s}")
2127 self.ftp.mkd(d)
2128 except Exception as detail:
2129 logging.error(f"mkdir(): \tCannot ftp mkdir: {str(detail):s}")
2130
2131 def rmdir(self, relative_dir):
2132 """Delete an empty directory using ftp."""
2133
2134 logging.debug(
2135 f"rmdir(): \tintermediate dir path name: {relative_dir:s}")
2136 logging.debug(f"rmdir(): \tremote root dir: {self.ftp_root_dir:s}")
2137
2138
2139 path, d = os.path.split(relative_dir)
2140 logging.debug(f"rmdir(): \tremote prefix dir: {path:s}")
2141 logging.debug(f"rmdir(): \tremote dir: {d:s}")
2142
2143 try:
2144
2145 remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2146 logging.debug(f"rmdir(): \tftp cd remote dir: {remote_dir:s}")
2147 self.ftp.cwd(remote_dir)
2148 except Exception as detail:
2149 logging.error(
2150 f"rmdir(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2151 else:
2152 try:
2153 logging.debug(f"rmdir(): \tftp rmd: {d:s}")
2154 self.ftp.rmd(d)
2155 except Exception as detail:
2156 logging.error(
2157 f"rmdir(): \tCannot ftp rmdir dir {d:s}: {str(detail):s}"
2158 f" Directory is probably not empty. Do a manual delete.")
2159
2160 def download(self, relative_file_path):
2161 """Download a binary file using ftp."""
2162
2163 logging.debug(f"download(): \tfile name: {relative_file_path:s}")
2164
2165
2166 relative_dir, file_name = os.path.split(relative_file_path)
2167 logging.debug(f"download(): \tfile name: {file_name:s}")
2168 logging.debug(f"download(): \trelative dir: {relative_dir:s}")
2169 logging.debug(f"download(): \troot dir: {self.ftp_root_dir:s}")
2170
2171
2172 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2173 logging.debug(f"download(): \tftp cd remote dir: {remote_dir:s}")
2174
2175 try:
2176 self.ftp.cwd(remote_dir)
2177 except Exception as detail:
2178 logging.error(
2179 f"download(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2180 else:
2181
2182
2183 local_file_name = self.append_root_dir(
2184 self.master_root_dir, relative_file_path)
2185 logging.debug(
2186 f"download(): \topen local file name: {local_file_name:s}")
2187 try:
2188 f = open(local_file_name, "wb")
2189 try:
2190
2191
2192 pass
2193 except Exception as detail:
2194 logging.error(
2195 f"download(): \tCannot cannot ftp retrbinary: {str(detail):s}")
2196 f.close()
2197 except IOError as detail:
2198 logging.error(
2199 f"download(): \tCannot open local file {local_file_name:s} for reading: {str(detail):s}")
2200
2201 def upload(self, relative_file_path):
2202 """Upload a binary file using ftp."""
2203
2204 logging.debug(
2205 f"upload(): \trelative file path name: {relative_file_path:s}")
2206
2207
2208 relative_dir, file_name = os.path.split(relative_file_path)
2209 logging.debug(f"upload(): \tfile name: {file_name:s}")
2210 logging.debug(f"upload(): \trelative dir: {relative_dir:s}")
2211 logging.debug(f"upload(): \tremote root dir: {self.ftp_root_dir:s}")
2212
2213
2214 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2215 logging.debug(f"upload(): \tftp cd remote dir: {remote_dir:s}")
2216
2217 try:
2218 self.ftp.cwd(remote_dir)
2219 except Exception as detail:
2220 logging.error(
2221 f"upload(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2222 else:
2223
2224
2225 local_file_name = self.append_root_dir(
2226 self.master_root_dir, relative_file_path)
2227 logging.debug(
2228 f"upload(): \topen local file name: {local_file_name:s}")
2229
2230 try:
2231 f = open(local_file_name, "rb")
2232 try:
2233
2234
2235 logging.debug(f"upload(): \tftp STOR file {file_name:s}")
2236 self.ftp.storbinary("STOR " + file_name, f)
2237 except Exception as detail:
2238 logging.error(
2239 f"upload(): \tCannot ftp storbinary: {str(detail):s}")
2240 f.close()
2241 except IOError as detail:
2242 logging.error(
2243 f"upload(): \tCannot open local file {local_file_name:s} for reading: {str(detail):s}")
2244
2245 def finish(self):
2246 """Log out of an ftp session"""
2247
2248 logging.debug("UpdateWeb::finish()")
2249 try:
2250 self.ftp.quit()
2251 except Exception as detail:
2252 logging.error(f"Cannot ftp quit because {str(detail):s}")
2253
2254
2255 if __name__ == '__main__':
2256 """Python executes all code in the file, so all classes and functions get defined first. Finally we come here.
2257 If we are executing this file as a Python script, the name of the current module is set to main,
2258 thus we'll call the main() function."""
2259
2260 main()
2261
2262 else:
2263 """When using as a module, start python, then import the module and call it:
2264 python
2265 import updateweb
2266 updateweb.main(["--test"])
2267
2268 Or if you want to debug, do this:
2269
2270 python
2271 import pdb
2272 import updateweb
2273 pdb.run('updateweb.main(["--test"])')
2274 b updateweb.main
2275 c
2276 <Now use n to step, l to list, etc>
2277 """
2278
2279 pass