# A class reading a list of stop words, a list of the 1000 most
# frequent words and storing them in lists. It also reads a text file
# and creates a content graph using co-occurrence in the same sentence
# for word connections. It outputs the graph to a file as well, that
# can be used to visualize it in the Graphia app.
# Author: Dana Vrajitoru and ...
# Class: C463/B551/I400 Artificial Intelligence, Fall 2025

from word_graph import *
from porter_stemmer import *

class Make_Graph:
    
    # Reads the list of words to be excluded from indexing and stores
    # them in a simple list.
    def read_stop_words(filename):
        try:
            fin = open(filename, 'r')
        except:
            print("Unable to read the stop words file.")
            return None
        stop_list = []
        for line in fin:
            word = line.strip()   # one word per line
            if word:
                stop_list.append(word)
        return stop_list
    
    # Reads the file containing the 1000 most frequent English words,
    # stems them, computes a score based on the frequency, and stores
    # it in a dictionary which is returned.
    def read_freq_words(filename, stemmer):
        try:
            fin = open(filename, 'r')
        except:
            print("Unable to read the frequent word file.")
            return None
        freq_dict = {}
        i = 0
        for line in fin:
            words = line.strip().split(' ')    # multiple words per line
            for word in words:
                if word:
                    word = stemmer.stem_word(word.lower())
                    score = (1000 - i) / 1000
                    freq_dict[word] = score # previous score for a similar word may be replaced
                    i += 1
        return freq_dict
    
    # To be implemented by the student
    # Reads a file containing a document to be indexed and uses the
    # list of stop words and the dictionary of frequent words. Creates
    # a content graph based on how frequent a word is in the document
    # and connecting the words that appear in the same sentence
    # together. Returns the created graph.
    def read_index_file(filename, stopw, freqw, stemmer):
        wg = Word_Graph() # to store the info you read
        # more code needed here

        return wg

# main code of the module
if __name__ == '__main__':
    stemmer = PorterStemmer()
    stop_words = Make_Graph.read_stop_words("stop_words.txt")
    # print(stop_words)
    freq_words = Make_Graph.read_freq_words("frequent_words.txt", stemmer)
    # print(freq_words)
    graph = Make_Graph.read_index_file("three_piggies.txt", stop_words, freq_words, stemmer)
    graph.print_top(5)
    graph.output_file("three_piggies_graph.txt")