# Document Searching Program # print("This program opens the file 'ap_docs.txt' and reads through the entire text organizing it into count dictionaries.") print("This 'ap_docs.text' is filled with multiple documents that have a bunch of different kinds of topics to choose from.") print("Since count dictionaries were made with this program it is now possible to search for individual words and to reveal") print("what document number those words are found in. In addition, you can now open individual documents themselves by") print("typing in the document number that you want to take a look at. Try out the program for a bit to see what I mean.") import string openedFile = False while (not openedFile): #Openfile is false so runs this program until true fileName = ("ap_docs.txt") #Input the filename to open output_file = open("chicken scratch",'w') #Opens an empty output file to write the desired information try: file = open(fileName,'r') #Try's to open the file that was inputted in by user openedFile = True except IOError: print("Bad file name, try again") def append_dict(dict_list,dictionary): #def function that appends all the dictionaries to a list dict_list.append(dictionary) def count_list(new_list): #def function that appends all the different list seperated by words list_list.append(new_list) #Each list corresponds to its by the index i. def count_dict(word,dictionary,i): #def functiont that give each dictionary its own i. if word in dictionary: #with this you can correspond the dictionary back to its list with same i dictionary[word] = i #Makes the dictionary only have one word corresponded to why even if it is repeated over and over... for example the word 'the' else: dictionary[word] = i i = 0 #i intialized as 0 dict_list = [] #Empty dictionary lists that will be appending all dictionaries to list_list = [] #Empty lists that will be appending the list seperated by into one big list. old_list = [] #Empty lists used to append lines into to make each list that will be appended to list_list last_line = 'Nothing right now' #last_line initialized dictionary = {} #Empty dictionary that will be adding all words and given a unique key value i depending on which document it is for line in file: #Goes through every line in the file import string #imports string file if (last_line[0:14:1])== "" and (line[0:14:1])!= "": # Continues putting lines into old_list and putting words into dictionaries until next tag occurs line = line.strip() word_list = line.split() for word in word_list: if word == '': word.replace('DOCUMENT>',"") elif word != '--': old_list.append(word) word = word.lower() word = word.strip() word = word.strip(string.punctuation) count_dict(word,dictionary,i) elif (line[0:14:1])== "": # appends the dictionaries and list to the bigger list every time the occurs. append_dict(dict_list,dictionary) # It appends the dictionary to the big list of dictionaries here new_list = ' '.join(old_list) # Joins the words into just one list here count_list(new_list) # Appends the list into the list of list here dictionary = {} # A new dictionary and list are made and the process starts over again old_list = [] line = line.strip() word_list = line.split() for word in word_list: if word == '': last_line = line i +=1 new_list = ' '.join(old_list) #Finishes joining the words of the very last list here count_list(new_list) #Adds the very last list to the big list of list append_dict(dict_list,dictionary) #Adds the very last dictionary to the big list of dictionaries replay = 'y' #Intialized to keep while looping going y_ans = 'y' #Same here blank = ' ' #Defines spaces while replay in y_ans: master_list = '' #Intializes an empty string print("What would you like to do? ") print("1. Search for Documents") print("2. Read Document") print("3. Quit Program") number = input(">") if number == '1': #If number 1 is entered then do this stuff... search_words = input("Enter search words:") if blank in search_words: #If there is a blank in between the words then do all this... for ch in enumerate(search_words): b = search_words.find(blank) #Finds the blank c = b+1 #Makes a number one index more than the blank word_1 = search_words[0:b:1] #Splices the first word up to the blank but not including it word_2 = search_words[c:] #Splices the second word by starting one index after the blank and going until end of word. word_1 = word_1.lower() #Does the usually desired formatting of the word word_1 = word_1.strip() word_1 = word_1.strip(string.punctuation) word_2 = word_2.lower() word_2 = word_2.strip() word_2 = word_2.strip(string.punctuation) for i in range(len(dict_list)): #Runs this loop for entire length of the dict_list going into each dictionary seperately if word_1 in dict_list[i] and word_2 in dict_list[i]: #If both of the words are in the one of the dictionaries in the list of many dictionaries it adds its unique index(key) to the master list master_list += str(i) master_list += ' ' #Master list puts a space in between each index in order to not confuse numbers elif search_words!= '--': #If word is simply one word then runs the simpler version of the above part word = search_words.lower() word = word.strip() word = word.strip(string.punctuation) for i in range(len(dict_list)): if word in dict_list[i]: master_list += str(i) master_list += ' ' if master_list == '': #Also if the word is not in any dictionaries then it prints this... print('No relevant documents were found') print('-'*25) elif number == '2': #If the number is 2 then... z = input(("Enter document number:")) #You enter the number of the document you want to read z = int(z) print("Document #",z) print("-"*25) read = list_list[z] #Since all the documents are corresponded to their list it splices out that certain list and prints it. print(read) print('-'*25) #Continue statement is used so you don't see information that is presented when doing number 1 function. continue elif number == '3': #if number is 3 it breaks the loop and stops the program returning you to the shell. replay = 'n' break master_list.strip() #This stuff prints when number 1 function is done... skipped by number 2 function by continue statement and skipped by number 3 function with the break statement print("Documents fitting search",master_list) print('-'*25)