#!/usr/bin/python """ FileStats.py P. TenHoopen, 11/21/08 Read the statistics of files from a specified directory and subdirectories. Display number of files, types and number of files, average size, smallest size, largest size """ # Import Modules import os # Global Variables extensions = {} # dictionary to hold extensions and counts largestFileSz = 0 # size of largest file largestFile = "" # name of largest file numFiles = 0 # number of files found sumFileSizes = 0.0 # total size of files used to calc average file size # Functions def recordExtension(thisExtension): """This function keeps track of the extensions that are found.""" # add to dictionary if new, or update count if existing if thisExtension not in extensions: extensions[thisExtension] = 1 #print "New file type '%s' was added" % thisExtension # debug else: extensions[thisExtension] += 1 #print "Count of %s is %s" % (thisExtension, extensions[thisExtension] ) # debug return 1 def collectFileInfo(fileName): """This function examines file statistics to keep track of largest file and total file sizes.""" # use global variables defined above global largestFileSz global largestFile global sumFileSizes # collect file statistics fileStats = os.stat(fileName) # get file size size = fileStats.st_size # keep size of all files sumFileSizes += size # is this the largest file found so far? if size > largestFileSz: largestFileSz = size largestFile = fileName return 1 def displayStats(): """This function displays the collected statistics.""" print "\n\nResults:" print "\nThere were %d files found." % numFiles print "\nCount \t File Ext" print "----- \t --------" for extension in extensions: print "%d \t %s" % (extensions[extension], extension) print "\n" print "Largest file: %s at %d bytes" % (largestFile, largestFileSz) print "Total size of files is %d bytes" % (sumFileSizes) print "Average size of files is %.2f bytes" % (sumFileSizes / numFiles) print "\n" return 1 def main(): """This function is called when the program starts. It analyzes the files specified in the topDir variable.""" # variables topDir = "." # directory to search, default to current directory # use global variables defined above global numFiles # get top directory to analyze from user userDir = raw_input("Input the directory to analyze: ") if (len(userDir) > 0): # weak error checking! topDir = userDir # as long as the user enters something, use it otherwise use "." print "\nAnalyzing files in '%s'..." % topDir # walk through the files walklist = os.walk(topDir) # os.walk returns {dirpath, dirnames, filenames} for root, dirs, files in walklist: # examine each file found for eachFile in files: numFiles += 1 # look for a extension delimiter (.) in the file name if "." in eachFile: # get the file extension using a string slice # extract all text after the period extension = eachFile[eachFile.index(".")+1:] else: # no period delimiter so no extension can be determined extension = "none" #print entry # debug #print extension # debug # keep track of extensions that are found recordExtension(extension) # get file name and path fileName = os.path.join(root, eachFile)e # examine files stats collectFileInfo(fileName) # display the results of the analysis displayStats() return 1 # This calls the 'main' function when this script is executed. if __name__ == '__main__': main()