edit · print · PDF

Please note that all the SIEpedia's articles address specific issues or questions raised by IAC users, so they do not attempt to be rigorous or exhaustive, and may or may not be useful or applicable in different or more general contexts.

Script to find a text sequence in files of a given extension, in a tree of directories

R Azzollini

#! /usr/bin/env python

def find_sequence(sequence=None,filetag=None,IsExt=True,path=None,grepopt=''):
    """Script to find a text sequence in files of a given extension, in
    a tree of directories."
""
    # IMPORT STUFF
    import os
    from time import time
    import string
    #from mystop import stop # ONLY LOCAL
    import sys
    # END IMPORT

    print '\nExecuting find_sequence...'
    print 'Another amazing aplication by Ruyman Azzollini\n'
    print '\nSequence : %s' % sequence
    if IsExt : fetchopt = 'Extension'
    else: fetchopt = 'Tag'
    print '%s : %s' % (fetchopt,filetag)


    filesF = 'tmpFiles%s.txt' % string.replace('%f'%time(),'.','_')

    if path == None: pathstr = ''
    else : pathstr = path
    if pathstr != '' and pathstr[-1] != '/' : pathstr += '/'

    os.system('find %s* > %s' % (pathstr,filesF))

    f = open(filesF,'r')
    files = f.readlines()
    f.close()
    os.system('rm %s' % filesF)

    files = [item[0:-1] for item in files]

    if len(files) == 0:
        print 'No Files to Check in current directory'
        sys.exit()

    CheckAllTags = False
    if filetag == None : CheckAllTags = True

    if grepopt != '' : grepopt = "%s" % grepopt

    for file in files:

        if not CheckAllTags :
            if IsExt:
                hastag = file[-len(filetag):] == filetag
            else:
                hastag = filetag in file
        else : hastag = True

        if hastag and sequence != None:
            tmpgrep = 'tmpGrep%s.txt' % string.replace('%f'%time(),'.','_')         
            os.system('cat %s | grep %s %s > %s' % (file,sequence,grepopt,tmpgrep))
            f = open(tmpgrep,'r')
            grepcont = f.readlines()
            f.close()
            os.system('rm %s' % tmpgrep)
            if len(grepcont) != 0:
                print '\n%s\n' % file
                for line in grepcont : print line[0:-1]
                print '\n'
            else : pass
        elif hastag:
            print file

    #stop()

########################################################################


if __name__ == "__main__":

    from optparse import OptionParser
    #from mystop import stop

    parser = OptionParser()
    parser.add_option("-e","--extension",dest="extension", default=None,help="Extension of files to fetch. Optional.")
    parser.add_option("-t","--tag",dest="tag",default=None,help="Sequence in file names to fetch. Optional.")
    parser.add_option("-s","--sequence",dest="sequence",default=None,help="Sequence to find inside files. Optional.")
    parser.add_option("-p","--path",dest="path",default=None,help="Directory to examine. Optional.")
    parser.add_option("-g","--grep",dest="grepopt",default='',help="Options to pass to the grep command. \nExample: -g '-i'\nOptional.")
    (options, args) = parser.parse_args()


    if options.sequence:
        sequence = options.sequence
    else : sequence = None

    if options.extension:
        extension = options.extension
    else : extension = None

    if options.tag:
        extension = options.tag
        IsExt = False
    else : IsExt = True

    if options.path:
        path = options.path
    else : path = None

    if options.grepopt:
        grepopt = options.grepopt
    else : grepopt = ''

    find_sequence(sequence,extension,IsExt,path,grepopt)

Section: HOWTOs

edit · print · PDF
Page last modified on April 07, 2008, at 10:03 PM