# Presidential Debate + Word Clouds # print("This program reads through the presidential debate and sorts/counts all the words that each president") print("said during the debate. The top 40 words each president said are then put into an html 'word cloud'") print("In order to eliminate common words such as the word 'a' the document 'stopWords.txt' file, which has all these") print("common words in it, was opened and compared to the presidents top 40 words said during the debate. If any") print("of these words matched with the words in the 'stopWords.txt' then they were deleted from the top 40 list") import string openedFile = False while (not openedFile): #Openfile is false so runs this program until true fileName = input("Open what file: ") #Input the filename to open output_file = open("best_and_worst.txt",'w') #Opens an empty output file to write the desired information try: file = open(fileName,'r') #Try's to open the file that was inputted in by user openedFile = True #If that file name exist the openedFile is true and can exit the loop except IOError: #When there is an IOError then that file doesn't exist so makes program print... print("Bad file name, try again ") #This comment when the file doesn't exist. Then loop restarts. if openedFile == True: #If the file name is true then do this other stuff... stop_word_file = open("stopWords.txt",'r') #Open the stop_Words.txt and read it. def make_HTML_box(body): '''Required -- body (string), a string of words Return -- a string that specifies an HTML box containing the body ''' box_str = """

{:s}

""" return box_str.format(body) def make_HTML_word(word,cnt,high,low): ''' make a word with a font size to be placed in the box. Font size is scaled between high and low (to be user set). high and low represent the high and low counts in the document. cnt is the count of the word Required -- word (string) to be formatted -- cnt (int) count of occurances of word -- high (int) highest word count in the document -- low (int) lowest word count in the document Return -- a string formatted for HTML that is scaled with respect to cnt''' ratio = (cnt-low)/float(high-low) font_size = high*ratio + (1-ratio)*low font_size = int(font_size) word_str = '{:s}' return word_str.format(str(font_size), word) def print_HTML_file(body,title): ''' create a standard html page (file) with titles, header etc. and add the body (an html box) to that page. File created is title+'.html' Required -- body (string), a string that specifies an HTML box Return -- nothing''' fd = open(title+'.html','w') the_str=""" """+title+"""

"""+title+'

'+'\n'+body+'\n'+"""

""" fd.write(the_str) fd.close() def lehrer_dict(word,lehrer,stop_words): #lehrer dictionary is made even though not really needed. Helped with finding missing words when programming if word in lehrer: # lehrer[word] += 1 # elif word not in(lehrer and stop_words): # lehrer[word] = 1 # def obama_dict(word,obama,stop_words): #Obama's dictionary is being made... if word in obama: # obama[word] += 1 # elif word not in(obama and stop_words): # obama[word] = 1 # def romney_dict(word,romney,stop_words): #Romney's dictionary is being made... if word in romney: # romney[word] += 1 # elif word not in(romney and stop_words): # romney[word] = 1 # obama = {} #Obama empty dictionary romney = {} #Romney empty dictionary lehrer = {} #Lehrer empty dictionary stop_words = ['obama'] #Stop_words dictionary starting with the one dictionary 'obama' for line in stop_word_file: #Goes through every line in stop_word_file line = line.strip() #Strips the brackets off stop_words.append(line) #appends each word to the stop_words dictionary last_line = "IT IS NOBODYS NAME YET" #starter last line needed to start program... defined multiple times throughout the function. for line in file: #Goes through every line in the file import string #imports string file if (last_line[0:10:1])== "MR. ROMNEY" and line[0:10:1] != "MR. LEHRER" and (line[0:9:1]) != "PRESIDENT": #If the paragraph is still romney then continue filling romney dictionary line = line.strip() #Strips the bracket off word_list = line.split() #Split all the words into individual items in list for word in word_list: #Goes through every word in word_list if word == 'MR.': #If word is 'MR.' then do this.... word.replace('MR.',"") #Replace 'MR.' with nothing elif word == 'ROMNEY:': #If word is 'ROMNEY:" then do this.... word.replace('ROMNEY:',"") #Replace 'ROMNEY:' with nothing elif word != '--': #If word is '--' then do this.... word = word.lower() #Make the word lowercase word = word.strip() #Strip any left brackets or spaces off word = word.strip(string.punctuation) #Remove the punctuation from the word romney_dict(word,romney,stop_words) #Add it to the appropriate dictionary elif (last_line[0:9:1]) == "PRESIDENT" and line[0:10:1] != "MR. LEHRER" and line[0:10:1] != "MR. ROMNEY": #If the paragraph is still Obama then continue filling obama dictionary line = line.strip() #Everything the same as above!! word_list = line.split() for word in word_list: if word == 'PRESIDENT': word.replace('PRESIDENT',"") elif word != '--': word = word.lower() word = word.strip() word = word.strip(string.punctuation) obama_dict(word,obama,stop_words) elif (line[0:9:1])== "PRESIDENT": #If starting dictionary is Obama then do this... line = line.strip() #Same stuff as above... except there is a last_line... word_list = line.split() for word in word_list: if word == 'PRESIDENT': word.replace('PRESIDENT',"") elif word != '--': word = word.lower() word = word.strip() word = word.strip(string.punctuation) last_line = line #Last_line is used to help program to continue adding into the appropriate dictionary obama_dict(word,obama,stop_words) elif(line[0:10:1])== "MR. ROMNEY": #If starting dictionary is Romney then do this.... line = line.strip() #Same stuff as above.... word_list = line.split() for word in word_list: if word == 'MR.': word.replace('MR.',"") elif word == 'ROMNEY:': word.replace('ROMNEY:',"") elif word != '--': word = word.lower() word = word.strip() word = word.strip(string.punctuation) last_line = line romney_dict(word,romney,stop_words) elif (line[0:10:1])!= "MR. ROMNEY" and (line[0:9:1])!= "PRESIDENT": #If starting dictionary is Lehrer then do this.... line = line.strip() #Same type of stuff as above... word_list = line.split() for word in word_list: if word != '--': word = word.lower() word = word.strip() word = word.strip(string.punctuation) last_line = line lehrer_dict(word,lehrer,stop_words) o_tuple = [] #Empty list pairs_o = [] #Empty list for key,val in obama.items(): #For all the key and value in obama dictionary do this.... o_tuple.append((val,key)) #Append each value to the first empty list o_tuple.sort(reverse = True) #Sort the first empty list... for i in range(40): #Make a range of 40 (the number wanted to print in list) pairs_o.append(o_tuple[i]) #Take the highest count 40 from the first list and put it into this second list in order to print this information r_tuple = [] #All the same stuff as before but doing this for romney's dictionary.... pairs_r = [] for key,val in romney.items(): r_tuple.append((val,key)) r_tuple.sort(reverse = True) for i in range(40): pairs_r.append(r_tuple[i]) #Made Romney's dictionary of the 40 highest count items in the entire debate file print('+'*20) print("Obama : words in frequency order as count:word pairs") print('{:3d}:{:1s}{:9d}:{:1s}{:13d}:{:1s}{:11d}:{:1s}'.format(pairs_o[0][0],pairs_o[0][1],pairs_o[1][0],pairs_o[1][1],pairs_o[2][0],pairs_o[2][1],pairs_o[3][0],pairs_o[3][1])) print('{:3d}:{:1s}{:8d}:{:1s}{:14d}:{:1s}{:13d}:{:1s}'.format(pairs_o[4][0],pairs_o[4][1],pairs_o[5][0],pairs_o[5][1],pairs_o[6][0],pairs_o[6][1],pairs_o[7][0],pairs_o[7][1])) print('{:3d}:{:1s}{:12d}:{:1s}{:13d}:{:1s}{:11d}:{:1s}'.format(pairs_o[8][0],pairs_o[8][1],pairs_o[9][0],pairs_o[9][1],pairs_o[10][0],pairs_o[10][1],pairs_o[11][0],pairs_o[11][1])) print('{:3d}:{:1s}{:9d}:{:1s}{:11d}:{:1s}{:7d}:{:1s}'.format(pairs_o[12][0],pairs_o[12][1],pairs_o[13][0],pairs_o[13][1],pairs_o[14][0],pairs_o[14][1],pairs_o[15][0],pairs_o[15][1])) print('{:3d}:{:1s}{:10d}:{:1s}{:14d}:{:1s}{:8d}:{:1s}'.format(pairs_o[16][0],pairs_o[16][1],pairs_o[17][0],pairs_o[17][1],pairs_o[18][0],pairs_o[18][1],pairs_o[19][0],pairs_o[19][1])) print('{:3d}:{:1s}{:9d}:{:1s}{:11d}:{:1s}{:12d}:{:1s}'.format(pairs_o[20][0],pairs_o[20][1],pairs_o[21][0],pairs_o[21][1],pairs_o[22][0],pairs_o[22][1],pairs_o[23][0],pairs_o[23][1])) print('{:3d}:{:1s}{:13d}:{:1s}{:13d}:{:1s}{:10d}:{:1s}'.format(pairs_o[24][0],pairs_o[24][1],pairs_o[25][0],pairs_o[25][1],pairs_o[26][0],pairs_o[26][1],pairs_o[27][0],pairs_o[27][1])) print('{:3d}:{:1s}{:6d}:{:1s}{:5d}:{:1s}{:11d}:{:1s}'.format(pairs_o[28][0],pairs_o[28][1],pairs_o[29][0],pairs_o[29][1],pairs_o[30][0],pairs_o[30][1],pairs_o[31][0],pairs_o[31][1])) print('{:3d}:{:1s}{:12d}:{:1s}{:9d}:{:1s}{:9d}:{:1s}'.format(pairs_o[32][0],pairs_o[32][1],pairs_o[33][0],pairs_o[33][1],pairs_o[34][0],pairs_o[34][1],pairs_o[35][0],pairs_o[35][1])) print('{:3d}:{:1s}{:9d}:{:1s}{:13d}:{:1s}{:14d}:{:1s}'.format(pairs_o[36][0],pairs_o[36][1],pairs_o[37][0],pairs_o[37][1],pairs_o[38][0],pairs_o[38][1],pairs_o[39][0],pairs_o[39][1])) print('+'*20) print("Romney : words in frequency order as count:word pairs") print('{:3d}:{:1s}{:11d}:{:1s}{:14d}:{:1s}{:8d}:{:1s}'.format(pairs_r[0][0],pairs_r[0][1],pairs_r[1][0],pairs_r[1][1],pairs_r[2][0],pairs_r[2][1],pairs_r[3][0],pairs_r[3][1])) print('{:3d}:{:1s}{:7d}:{:1s}{:14d}:{:1s}{:11d}:{:1s}'.format(pairs_r[4][0],pairs_r[4][1],pairs_r[5][0],pairs_r[5][1],pairs_r[6][0],pairs_r[6][1],pairs_r[7][0],pairs_r[7][1])) print('{:3d}:{:1s}{:12d}:{:1s}{:12d}:{:1s}{:10d}:{:1s}'.format(pairs_r[8][0],pairs_r[8][1],pairs_r[9][0],pairs_r[9][1],pairs_r[10][0],pairs_r[10][1],pairs_r[11][0],pairs_r[11][1])) print('{:3d}:{:1s}{:13d}:{:1s}{:10d}:{:1s}{:13d}:{:1s}'.format(pairs_r[12][0],pairs_r[12][1],pairs_r[13][0],pairs_r[13][1],pairs_r[14][0],pairs_r[14][1],pairs_r[15][0],pairs_r[15][1])) print('{:3d}:{:1s}{:12d}:{:1s}{:14d}:{:1s}{:10d}:{:1s}'.format(pairs_r[16][0],pairs_r[16][1],pairs_r[17][0],pairs_r[17][1],pairs_r[18][0],pairs_r[18][1],pairs_r[19][0],pairs_r[19][1])) print('{:3d}:{:1s}{:7d}:{:1s}{:12d}:{:1s}{:12d}:{:1s}'.format(pairs_r[20][0],pairs_r[20][1],pairs_r[21][0],pairs_r[21][1],pairs_r[22][0],pairs_r[22][1],pairs_r[23][0],pairs_r[23][1])) print('{:3d}:{:1s}{:12d}:{:1s}{:9d}:{:1s}{:13d}:{:1s}'.format(pairs_r[24][0],pairs_r[24][1],pairs_r[25][0],pairs_r[25][1],pairs_r[26][0],pairs_r[26][1],pairs_r[27][0],pairs_r[27][1])) print('{:3d}:{:1s}{:13d}:{:1s}{:12d}:{:1s}{:12d}:{:1s}'.format(pairs_r[28][0],pairs_r[28][1],pairs_r[29][0],pairs_r[29][1],pairs_r[30][0],pairs_r[30][1],pairs_r[31][0],pairs_r[31][1])) print('{:3d}:{:1s}{:13d}:{:1s}{:10d}:{:1s}{:10d}:{:1s}'.format(pairs_r[32][0],pairs_r[32][1],pairs_r[33][0],pairs_r[33][1],pairs_r[34][0],pairs_r[34][1],pairs_r[35][0],pairs_r[35][1])) print('{:3d}:{:1s}{:13d}:{:1s}{:14d}:{:1s}{:8d}:{:1s}'.format(pairs_r[36][0],pairs_r[36][1],pairs_r[37][0],pairs_r[37][1],pairs_r[38][0],pairs_r[38][1],pairs_r[39][0],pairs_r[39][1])) print('+'*20) ################################## #HTML Box Part o_pairs = [] for i in range(40): o_pairs.append([pairs_o[i][1],pairs_o[i][0]]) #Makes the list sort into alphabetical order o_pairs.sort() r_pairs = [] for i in range(40): r_pairs.append([pairs_r[i][1],pairs_r[i][0]]) #Makes the list sort into alphabetical order r_pairs.sort() high_count_o = int(pairs_o[0][0]) #Grabs the high count from data before low_count_o = int(pairs_o[39][0]) #Grabs the low count from data before body_o = '' for word,cnt in o_pairs: body_o = body_o + " " + make_HTML_word(word,cnt,high_count_o,low_count_o) #Given functions box = make_HTML_box(body_o) print_HTML_file(box,"Obama") high_count_r = int(pairs_r[0][0]) #Same as stuff above.... low_count_r = int(pairs_r[39][0]) body_r = '' for word,cnt in r_pairs: body_r = body_r + " " + make_HTML_word(word,cnt,high_count_r,low_count_r) box = make_HTML_box(body_r) print_HTML_file(box,"Romney") file.close() #Closes the reading file output_file.close() #Closes the writing file stop_word_file.close() #Closes the other reading file #################################################################