# A class reading a list of stop words, a list of the 1000 most # frequent words and storing them in lists. It also reads a text file # and creates a content graph using co-occurrence in the same sentence # for word connections. It outputs the graph to a file as well, that # can be used to visualize it in the Graphia app. # Author: Dana Vrajitoru and ... # Class: C463/B551/I400 Artificial Intelligence, Fall 2025 from word_graph import * from porter_stemmer import * class Make_Graph: # Reads the list of words to be excluded from indexing and stores # them in a simple list. def read_stop_words(filename): try: fin = open(filename, 'r') except: print("Unable to read the stop words file.") return None stop_list = [] for line in fin: word = line.strip() # one word per line if word: stop_list.append(word) return stop_list # Reads the file containing the 1000 most frequent English words, # stems them, computes a score based on the frequency, and stores # it in a dictionary which is returned. def read_freq_words(filename, stemmer): try: fin = open(filename, 'r') except: print("Unable to read the frequent word file.") return None freq_dict = {} i = 0 for line in fin: words = line.strip().split(' ') # multiple words per line for word in words: if word: word = stemmer.stem_word(word.lower()) score = (1000 - i) / 1000 freq_dict[word] = score # previous score for a similar word may be replaced i += 1 return freq_dict # To be implemented by the student # Reads a file containing a document to be indexed and uses the # list of stop words and the dictionary of frequent words. Creates # a content graph based on how frequent a word is in the document # and connecting the words that appear in the same sentence # together. Returns the created graph. def read_index_file(filename, stopw, freqw, stemmer): wg = Word_Graph() # to store the info you read # more code needed here return wg # main code of the module if __name__ == '__main__': stemmer = PorterStemmer() stop_words = Make_Graph.read_stop_words("stop_words.txt") # print(stop_words) freq_words = Make_Graph.read_freq_words("frequent_words.txt", stemmer) # print(freq_words) graph = Make_Graph.read_index_file("three_piggies.txt", stop_words, freq_words, stemmer) graph.print_top(5) graph.output_file("three_piggies_graph.txt")