diff --git a/README.md b/README.md
index 6246d10..2a8788a 100644
--- a/README.md
+++ b/README.md
@@ -52,5 +52,10 @@ Programming projects that I have written at university
 - [Project 1](/cmpe362/project1) `Matlab`
 - [Project 2](/cmpe362/project2) `Matlab`
 - [Project 3](/cmpe362/project3) `Matlab`
+- 
+### [CmpE493 - Introduction to Information Retrieval](/cmpe493)
+- [Project 1](/cmpe493/project1) `Python`
+- [Project 2](/cmpe493/project2) `Python`
+- [Project 3](/cmpe493/project3) `Python`
 
 ### [CmpE352 & CmpE451 - Project Development in Software Engineering](https://github.com/bounswe/bounswe2017group11)
diff --git a/cmpe493/README.md b/cmpe493/README.md
new file mode 100644
index 0000000..9de438b
--- /dev/null
+++ b/cmpe493/README.md
@@ -0,0 +1,4 @@
+# CmpE493 - Introduction to Information Retrieval
+| Term | Instructor |
+| --- | --- |
+| Spring 2018  | Arzucan Özgür  |
\ No newline at end of file
diff --git a/cmpe493/project1/README.md b/cmpe493/project1/README.md
new file mode 100644
index 0000000..84d9acc
--- /dev/null
+++ b/cmpe493/project1/README.md
@@ -0,0 +1,13 @@
+Entry point of my application is app.py
+
+You should use python3
+
+Run my app with "python3 app.py" command.
+
+If you run it first time, it creates inverted index.
+
+Default directory name is "reuters21578" for data.
+
+Stop word data have to be in current directory and named as "stopwords.txt"
+
+You can use help command for further information
\ No newline at end of file
diff --git a/cmpe493/project1/Report.pdf b/cmpe493/project1/Report.pdf
new file mode 100644
index 0000000..3b0417d
Binary files /dev/null and b/cmpe493/project1/Report.pdf differ
diff --git a/cmpe493/project1/app.py b/cmpe493/project1/app.py
new file mode 100644
index 0000000..ef28704
--- /dev/null
+++ b/cmpe493/project1/app.py
@@ -0,0 +1,115 @@
+import os
+import re
+from indexer import Indexer
+from query import Query
+from query import QueryType
+
+def make_bold(string):
+    """
+        Makes given string bold for terminal with escape characters
+    """
+    return "\033[1m{}\033[0m".format(string)
+
+def command_help():
+    """
+        Prints help page of the application to terminal
+    """
+    print("\n\n\t\t\t\t\t" + make_bold("--- REUTERS SEARCH ENGINE ---"))
+    print(make_bold("COMMANDS"))
+    print("\t" + make_bold("exit") + "\t\t Exits from program")
+    print("\t" + make_bold("help") + "\t\t Lists available commands")
+    print("\t" + make_bold("index [FOLDER]") + "\t Indexes document positional inverted index. Default: reuters21578")
+    print("\t" + make_bold("clear") + "\t\t Clear console screen")
+    print("\t" + make_bold("remove") + "\t\t Removes inverted index files")
+    print("\t" + make_bold("postings WORD") + "\t Returns postings of word")
+    print("\n\t" + "** There is no special command for query processing.")
+    print("\t" + "Inputs that aren't special command interpreted as query")
+    print("\n\t" + make_bold("[QUERY_TYPE] YOUR_QUERY") + "\t Processes query based on given type.")
+    print("\t\t\t\t If no type is given, it predicts query type")
+    print("\t" + make_bold("Query Types"))
+    print("\t\t" + make_bold(QueryType.CONJUNCTIVE) + " -> Conjunctive Query")
+    print("\t\t" + make_bold(QueryType.PHRASE) + " -> Phrase Query")
+    print("\t\t" + make_bold(QueryType.PROXIMITY) + " -> Proximity Query")
+    print("\n\n\n")
+
+def command_index(directory):
+    """
+        Indexes data that from given directory again 
+    """
+    global dictionary
+    global index
+    Indexer.remove_index()
+    # Set default data directory
+    if directory is None:
+        directory = 'reuters21578'
+    print('Indexing ' + directory + ' folder...')
+    Indexer.create_index(directory=directory)
+    dictionary, index = Indexer.get_index()
+    print('Index created')
+
+def command_remove():
+    """
+        Removes current index files
+    """
+    Indexer.remove_index()
+    global dictionary
+    global index
+    dictionary = {}
+    index = {}
+    print('Index removed')
+
+def command_postings(word, dictionary, index):
+    """
+        Returns postings of given word
+    """
+    postings = Indexer.get_postings(word, dictionary, index)
+    print(postings)
+
+def command_exit():
+    """
+        Exits from application
+    """
+    print("Goodbye...")
+    exit(1)
+
+def command_clear():
+    """
+        Clears terminal screen
+    """
+    os.system("clear")
+
+####################################
+########## APP START HERE ##########
+####################################
+
+# If the index isn't created create it
+if not Indexer.is_indexed():
+    command_index(None)
+else:
+    print('Data is already indexed')
+
+dictionary, index = Indexer.get_index()
+
+print("Type " + make_bold("help") + " for any documentation")
+while True:
+    # Get command from user and processes it
+    command = input("query> ")
+    postings_command = re.match(r'^postings\s(\w+)', command)
+    index_command = re.match(r'^index\s?(\w+)?', command)
+    if command == "exit":
+        command_exit()
+    elif index_command:
+        command_index(index_command.group(1))
+    elif command == "help":
+        command_help()
+    elif command == "clear":
+        command_clear()
+    elif command == "remove":
+        command_remove()
+    elif postings_command:
+        command_postings(postings_command.group(1), dictionary, index)
+    else:
+        query = Query(command)
+        result = query.run(dictionary, index)
+        print(make_bold(str(len(result)) + ' documents are founded'))
+        print(sorted(result))
diff --git a/cmpe493/project1/cmpe493-assignment1-specification.pdf b/cmpe493/project1/cmpe493-assignment1-specification.pdf
new file mode 100644
index 0000000..97f42e0
Binary files /dev/null and b/cmpe493/project1/cmpe493-assignment1-specification.pdf differ
diff --git a/cmpe493/project1/indexer.py b/cmpe493/project1/indexer.py
new file mode 100644
index 0000000..b546e02
--- /dev/null
+++ b/cmpe493/project1/indexer.py
@@ -0,0 +1,142 @@
+import os, re, pickle
+from tokenizer import Tokenizer
+
+class Indexer:
+    """
+        Handles inverted index operations
+    """
+    DICTIONARY_NAME = 'dictionary.txt' # Name of the dictionary file
+    INDEX_NAME = 'inverted_index.txt'  # Name of the inverted index file
+    POSTING_ID = 1 # Starting ID for posting lists
+
+    @classmethod
+    def read_files(self, directory=None):
+        """
+            Returns read documents from data directory
+        """
+        # If no directory is given, set it to current directory
+        directory = os.getcwd() if directory is None else directory
+        filenames = os.listdir(directory)
+        # Get all file with .sgm extension
+        filenames = [filename for filename in filenames if filename.endswith(".sgm")]
+        filenames.sort()
+        documents = []
+        # Extract documents from each file
+        for filename in filenames:
+            raw_data = open(os.path.join(directory, filename), "r", encoding="latin-1").read()
+            documents += self.extract_documents(raw_data)
+        return documents
+
+    @classmethod
+    def extract_documents(self, raw_data):
+        """
+            Extracts documents from raw string
+        """
+        # Some news don't have body or title
+        # return re.findall(r'<REUTERS.*?NEWID=\"(?P<id>\d+)\">.*?<TITLE>(?P<title>.*?)</TITLE>.*?<BODY>(?P<body>.*?)</BODY>.*?</REUTERS>', raw_data, re.DOTALL)
+        documents = []
+        # Seperate each document
+        raw_documents = raw_data.split('</REUTERS>')
+        # Extract information from each raw document string
+        for raw_document in raw_documents:
+            doc_id = re.match(r'.+?NEWID=\"(?P<id>\d+)\">.+?', raw_document, re.DOTALL)
+            doc_title = re.match(r'.+?<TITLE>(?P<title>.+?)</TITLE>.+?', raw_document, re.DOTALL)
+            doc_body = re.match(r'.+?<BODY>(?P<body>.+?)</BODY>.+?', raw_document, re.DOTALL)
+           
+            # If raw corpus has ID, it's a document, add it to list
+            if doc_id:
+                doc_id = int(doc_id.group('id'))
+                # If it's not have title or body, put empty string instead of them 
+                doc_title = doc_title.group('title') if doc_title else ''
+                doc_body = doc_body.group('body') if doc_body else ''
+                documents.append({'id': doc_id, 'title': doc_title, 'body':doc_body})
+        return documents
+
+    @classmethod
+    def create_index(self, directory=None):
+        """
+            Creates index from data that in given directory
+        """
+        # Read files and get documents
+        documents = self.read_files(directory)
+        # Initialize directory and inverted index
+        dictionary = {}
+        inverted_index = {}
+        # Load stop words from file
+        stop_words = Tokenizer.stop_words()
+
+        for document in documents:
+            doc_id = document['id']
+            # Concatenate title and body, then tokenize this combination
+            tokens = Tokenizer.tokenize(document['title'] + ' ' + document['body'])
+            # Iterate all tokens and if it's not a stop word, add it to index with it's position
+            for position, token in enumerate(tokens):
+                if not token in stop_words:
+                    # Get ID of positional indexes of the token
+                    postings_id = dictionary.get(token, self.get_posting_id())
+                    # Get positional indexes of token as dictionary
+                    postings = inverted_index.get(postings_id, {})
+                    # Get positions of the token in the document as list
+                    positions = postings.get(doc_id, [])
+                    # Add this position to positional index
+                    positions.append(position)
+                    # Put positions list of the this document back to token's document's list
+                    postings[doc_id] = positions
+                    # Put updated positional indexes of the token back to inverted index
+                    inverted_index[postings_id] = postings
+                    # Update ID of the token in dictionary
+                    dictionary[token] = postings_id
+        # Save created index to file
+        self.save_index(dictionary, inverted_index)
+    
+    @classmethod
+    def get_posting_id(self):
+        """
+            Returns globally incremented ID for next postings list
+        """
+        self.POSTING_ID += 1
+        return self.POSTING_ID - 1
+
+    @classmethod
+    def get_postings(self, token, dictionary, index):
+        """
+            Returns documents and positions of given token after normalization
+        """
+        stem = Tokenizer.normalize_and_stem(token)
+        posting_id = dictionary.get(stem)
+        return index.get(posting_id, {})
+
+    @classmethod
+    def save_index(self, directory, index):
+        """
+            Save dictionary and inverted index to file
+        """
+        pickle.dump(directory, open(self.DICTIONARY_NAME, 'wb'))
+        pickle.dump(index, open(self.INDEX_NAME, 'wb'))
+
+    @classmethod
+    def get_index(self):
+        """
+            Load dictionary and inverted index from file
+            Returns: 
+                dictionary, index 
+        """
+        return pickle.load(open(self.DICTIONARY_NAME, 'rb')), pickle.load(open(self.INDEX_NAME, 'rb'))
+
+    @classmethod
+    def remove_index(self):
+        """
+            Removes old inverted index files
+        """
+        try:
+            os.remove(self.DICTIONARY_NAME)
+            os.remove(self.INDEX_NAME)
+        except OSError:
+            pass
+
+    @classmethod
+    def is_indexed(self):
+        """
+            Checks if index is exist
+        """
+        return os.path.isfile(self.DICTIONARY_NAME) and os.path.isfile(self.INDEX_NAME)
\ No newline at end of file
diff --git a/cmpe493/project1/query.py b/cmpe493/project1/query.py
new file mode 100644
index 0000000..63f5412
--- /dev/null
+++ b/cmpe493/project1/query.py
@@ -0,0 +1,216 @@
+import re
+from enum import Enum
+from tokenizer import Tokenizer
+
+class Query:
+    """
+        Handles query operations
+    """
+
+    def __init__(self, query):
+        """
+            Constructs an new query with given one
+        """
+        self.type, self.query = Query.extract(query)
+
+    @staticmethod
+    def extract(query):
+        """
+            Returns query's text and it's type
+            If no type is given, it guesses type of the query
+            Returns: 
+                type, query 
+        """
+        type = re.match(r'^\d+', query)
+        if type:
+            # Get type of the query from text and return with query
+            return int(type.group()), re.sub(r'^\d+\s', '', query)
+        else:
+            # Guess type of the query
+            # If it has AND it's conjunctive
+            # If it has /NUMBER it's proximity
+            # Otherwise, it's phrase
+            if 'AND' in query:
+                return QueryType.CONJUNCTIVE, query
+            elif re.match(r'.*?/\d+.*?', query):
+                return QueryType.PROXIMITY, query
+            else:
+                return QueryType.PHRASE, query
+
+    def run(self, dictionary, index):
+        """
+            Runs query depends on it's type
+            Phrase queries are same with proximity queries.
+            Just replace spaces with /0
+        """
+        result = []
+        if self.type == QueryType.CONJUNCTIVE:
+            result = self.run_conjunctive(dictionary, index)
+        elif self.type == QueryType.PHRASE:
+            self.query = self.query.replace(' ', ' /0 ')
+            result = self.run_proximity(dictionary, index)
+        elif self.type == QueryType.PROXIMITY:
+            result = self.run_proximity(dictionary, index)
+        else:
+            print("Unknown query type")
+        return result
+
+    def run_conjunctive(self, dictionary, index):
+        """
+            Runs conjunctive query
+        """
+        # Tokenize and normalize query
+        tokens = self.query.split(' AND ')
+        stems = [Tokenizer.normalize_and_stem(token) for token in tokens]
+        # Get inverted indexies of all stems
+        postings_list = []
+        for stem in stems:
+            posting_id = dictionary.get(stem)
+            postings = index.get(posting_id, {})
+            postings_list.append(postings)
+        # Intersect given lists
+        return self.intersect_list(postings_list)
+
+    def run_proximity(self, dictionary, index):
+        """
+            Runs proximity query
+        """
+        result = []
+        # Tokenize and normalize query
+        tokens = re.split(r'\s\/\d+\s', self.query)
+        stems = [Tokenizer.normalize_and_stem(token) for token in tokens]
+        # Get proximities
+        proximities = [int(proximity) for proximity in re.findall(r'\d+', self.query)]
+        # Get inverted indexies of all stems
+        postings_list = []
+        for stem in stems:
+            posting_id = dictionary.get(stem)
+            postings = index.get(posting_id, {})
+            postings_list.append(postings)
+        # Intersect positionally given lists
+        return self.positional_intersect_list(postings_list, proximities)
+
+    def intersect(self, ps1, ps2):
+        """
+            Intersects two given lists
+            Algorithm is based on Figure 1.6 from book
+        """
+        answer = []
+        p1 = next(ps1)
+        p2 = next(ps2)
+        while True:
+            try:
+                if p1 == p2:
+                    # If document's IDs are same add to answers
+                    answer.append(p1)
+                    p1 = next(ps1)
+                    p2 = next(ps2)
+                elif p1 < p2:
+                    p1 = next(ps1)
+                else:
+                    p2 = next(ps2)
+            except StopIteration:
+                break
+        return answer
+
+    def intersect_list(self, postings_list):
+        """
+            Intersects multiple lists
+            Algorithm is based on Figure 1.7 from book
+        """
+        # Sort them by their frequencies
+        postings_list.sort(key=lambda postings: len(postings))
+
+        # Intersect lists 2 by 2
+        result = postings_list.pop(0)
+        while result and postings_list:
+            try:
+                postings = postings_list.pop(0)
+                result = self.intersect(iter(result), iter(postings))
+            except IndexError:
+                break
+        if isinstance(result, dict):
+            result = list(result.keys())
+        return result
+
+    def positional_intersect_list(self, postings_list, proximities):
+        """
+            Intersects positionally multiple lists
+            Intersect next list with previous answer
+        """
+        start_postings = postings_list.pop(0)
+        result = start_postings.keys()
+        while postings_list and proximities:
+            try:
+                next_postings = postings_list.pop(0)
+                k = proximities.pop(0)
+                result, start_postings = self.positional_intersect(start_postings, next_postings, k)
+                if not result:
+                    break
+            except IndexError:
+                break
+        return result
+
+    def positional_intersect(self, ps1, ps2, k):
+        """
+            Intersects two given lists by proximity
+            Algorithm is based on Figure 2.12 from book
+            When you said next to last element,
+            Python throws and exception instead of just return None
+            So my method has too many try:except blocks
+        """
+
+        k = k + 1
+        answer = set()
+        postings = {}
+        ps1_iter = iter(ps1)
+        ps2_iter = iter(ps2)
+        try:
+            p1 = next(ps1_iter)
+            p2 = next(ps2_iter)
+        except StopIteration:
+            return answer, postings
+        while True:
+            try:
+                if p1 == p2:
+                    l = []
+                    p1_positions = iter(ps1[p1])
+                    p2_positions = iter(ps2[p2])
+                    pp1 = next(p1_positions)
+                    pp2 = next(p2_positions)
+                    while True:
+                        try:
+                            while True:
+                                try:
+                                    if pp2 - pp1 <= k and pp2 - pp1 > 0:
+                                        l.append(pp2)
+                                    elif pp2 > pp1:
+                                        break
+                                    pp2 = next(p2_positions)
+                                except StopIteration:
+                                    break
+                            for ps in l:
+                                answer.add(p1)
+                                positions = postings.get(p1, [])
+                                positions.append(ps)
+                                postings[p1] = positions
+                            pp1 = next(p1_positions)
+                        except StopIteration:
+                            break
+                    p1 = next(ps1_iter)
+                    p2 = next(ps2_iter)
+                elif p1 < p2:
+                    p1 = next(ps1_iter)
+                else:
+                    p2 = next(ps2_iter)
+            except StopIteration:
+                break
+        return answer, postings
+
+class QueryType:
+    """
+        Enumerates type of the query
+    """
+    CONJUNCTIVE = 1
+    PHRASE      = 2
+    PROXIMITY   = 3
diff --git a/cmpe493/project1/report.py b/cmpe493/project1/report.py
new file mode 100644
index 0000000..a6849d7
--- /dev/null
+++ b/cmpe493/project1/report.py
@@ -0,0 +1,79 @@
+import os
+import re
+from indexer import Indexer
+from tokenizer import Tokenizer
+import operator
+
+documents = Indexer.read_files("original_data")
+
+countA = 0
+countB = 0
+
+setC = set()
+setD = set()
+
+counterE = {}
+counterF = {}
+
+for document in documents:
+    countA += len(document['title'].split())
+    countA += len(document['body'].split())
+    
+    countB += len(Tokenizer.remove_stop_words(document['title']).split())
+    countB += len(Tokenizer.remove_stop_words(document['body']).split())
+
+    setC |= set(document['title'].split())
+    setC |= set(document['body'].split())
+
+    setD |= set([
+        Tokenizer.stem(token) for token in Tokenizer.remove_stop_words( Tokenizer.make_lower_case(document['title']) ).split()
+    ])
+
+    setD |= set([
+        Tokenizer.stem(token) for token in Tokenizer.remove_stop_words(Tokenizer.make_lower_case(document['body'])).split()
+    ])
+
+    for term in document['title'].split():
+        count = counterE.get(term, 0)
+        count += 1
+        counterE[term] = count
+
+    for term in document['body'].split():
+        count = counterE.get(term, 0)
+        count += 1
+        counterE[term] = count
+
+    for term in [Tokenizer.stem(token) for token in Tokenizer.remove_stop_words(Tokenizer.make_lower_case(document['title'])).split()]:
+        count = counterF.get(term, 0)
+        count += 1
+        counterF[term] = count
+
+    for term in [Tokenizer.stem(token) for token in Tokenizer.remove_stop_words(Tokenizer.make_lower_case(document['body'])).split()]:
+        count = counterF.get(term, 0)
+        count += 1
+        counterF[term] = count
+
+'''
+ PRINT RESULTS
+'''
+print("\n(a) How many tokens does the corpus contain before stopword removal and stemming?")
+print(countA)
+
+print("\n(b) How many tokens does the corpus contain after stopword removal and stemming?")
+print(countB)
+
+print("\n(c) How many terms(unique tokens) are there before stopword removal, stemming, and case - folding?")
+print(len(setC))
+
+print("\n(d) How many terms(unique tokens) are there after stopword removal, stemming, and casefolding?")
+print(len(setD))
+
+print("\n(e) List the top 20 most frequent terms before stopword removal, stemming, and casefolding?")
+tops = list(sorted(counterE.items(), key=operator.itemgetter(1), reverse=True))
+for i in range(0, 20):
+    print(tops[i])
+
+print("\n(f) List the top 20 most frequent terms after stopword removal, stemming, and case - folding?")
+tops = list(sorted(counterF.items(), key=operator.itemgetter(1), reverse=True))
+for i in range(0, 20):
+    print(tops[i])
diff --git a/cmpe493/project1/stemmer.py b/cmpe493/project1/stemmer.py
new file mode 100644
index 0000000..aed41e4
--- /dev/null
+++ b/cmpe493/project1/stemmer.py
@@ -0,0 +1,407 @@
+#!/usr/bin/env python
+
+"""Porter Stemming Algorithm
+This is the Porter stemming algorithm, ported to Python from the
+version coded up in ANSI C by the author. It may be be regarded
+as canonical, in that it follows the algorithm presented in
+
+Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+no. 3, pp 130-137,
+
+only differing from it at the points maked --DEPARTURE-- below.
+
+See also http://www.tartarus.org/~martin/PorterStemmer
+
+The algorithm as described in the paper could be exactly replicated
+by adjusting the points of DEPARTURE, but this is barely necessary,
+because (a) the points of DEPARTURE are definitely improvements, and
+(b) no encoding of the Porter stemmer I have seen is anything like
+as exact as this version, even with the points of DEPARTURE!
+
+Vivake Gupta (v@nano.com)
+
+Release 1: January 2001
+
+Further adjustments by Santiago Bruno (bananabruno@gmail.com)
+to allow word input not restricted to one word per line, leading
+to:
+
+release 2: July 2008
+"""
+
+import sys
+
+
+class PorterStemmer:
+
+    def __init__(self):
+        """The main part of the stemming algorithm starts here.
+        b is a buffer holding a word to be stemmed. The letters are in b[k0],
+        b[k0+1] ... ending at b[k]. In fact k0 = 0 in this demo program. k is
+        readjusted downwards as the stemming progresses. Zero termination is
+        not in fact used in the algorithm.
+
+        Note that only lower case sequences are stemmed. Forcing to lower case
+        should be done before stem(...) is called.
+        """
+
+        self.b = ""  # buffer for word to be stemmed
+        self.k = 0
+        self.k0 = 0
+        self.j = 0   # j is a general offset into the string
+
+    def cons(self, i):
+        """cons(i) is TRUE <=> b[i] is a consonant."""
+        if self.b[i] == 'a' or self.b[i] == 'e' or self.b[i] == 'i' or self.b[i] == 'o' or self.b[i] == 'u':
+            return 0
+        if self.b[i] == 'y':
+            if i == self.k0:
+                return 1
+            else:
+                return (not self.cons(i - 1))
+        return 1
+
+    def m(self):
+        """m() measures the number of consonant sequences between k0 and j.
+        if c is a consonant sequence and v a vowel sequence, and <..>
+        indicates arbitrary presence,
+
+           <c><v>       gives 0
+           <c>vc<v>     gives 1
+           <c>vcvc<v>   gives 2
+           <c>vcvcvc<v> gives 3
+           ....
+        """
+        n = 0
+        i = self.k0
+        while 1:
+            if i > self.j:
+                return n
+            if not self.cons(i):
+                break
+            i = i + 1
+        i = i + 1
+        while 1:
+            while 1:
+                if i > self.j:
+                    return n
+                if self.cons(i):
+                    break
+                i = i + 1
+            i = i + 1
+            n = n + 1
+            while 1:
+                if i > self.j:
+                    return n
+                if not self.cons(i):
+                    break
+                i = i + 1
+            i = i + 1
+
+    def vowelinstem(self):
+        """vowelinstem() is TRUE <=> k0,...j contains a vowel"""
+        for i in range(self.k0, self.j + 1):
+            if not self.cons(i):
+                return 1
+        return 0
+
+    def doublec(self, j):
+        """doublec(j) is TRUE <=> j,(j-1) contain a double consonant."""
+        if j < (self.k0 + 1):
+            return 0
+        if (self.b[j] != self.b[j - 1]):
+            return 0
+        return self.cons(j)
+
+    def cvc(self, i):
+        """cvc(i) is TRUE <=> i-2,i-1,i has the form consonant - vowel - consonant
+        and also if the second c is not w,x or y. this is used when trying to
+        restore an e at the end of a short  e.g.
+
+           cav(e), lov(e), hop(e), crim(e), but
+           snow, box, tray.
+        """
+        if i < (self.k0 + 2) or not self.cons(i) or self.cons(i - 1) or not self.cons(i - 2):
+            return 0
+        ch = self.b[i]
+        if ch == 'w' or ch == 'x' or ch == 'y':
+            return 0
+        return 1
+
+    def ends(self, s):
+        """ends(s) is TRUE <=> k0,...k ends with the string s."""
+        length = len(s)
+        if s[length - 1] != self.b[self.k]:  # tiny speed-up
+            return 0
+        if length > (self.k - self.k0 + 1):
+            return 0
+        if self.b[self.k - length + 1:self.k + 1] != s:
+            return 0
+        self.j = self.k - length
+        return 1
+
+    def setto(self, s):
+        """setto(s) sets (j+1),...k to the characters in the string s, readjusting k."""
+        length = len(s)
+        self.b = self.b[:self.j + 1] + s + self.b[self.j + length + 1:]
+        self.k = self.j + length
+
+    def r(self, s):
+        """r(s) is used further down."""
+        if self.m() > 0:
+            self.setto(s)
+
+    def step1ab(self):
+        """step1ab() gets rid of plurals and -ed or -ing. e.g.
+
+           caresses  ->  caress
+           ponies    ->  poni
+           ties      ->  ti
+           caress    ->  caress
+           cats      ->  cat
+
+           feed      ->  feed
+           agreed    ->  agree
+           disabled  ->  disable
+
+           matting   ->  mat
+           mating    ->  mate
+           meeting   ->  meet
+           milling   ->  mill
+           messing   ->  mess
+
+           meetings  ->  meet
+        """
+        if self.b[self.k] == 's':
+            if self.ends("sses"):
+                self.k = self.k - 2
+            elif self.ends("ies"):
+                self.setto("i")
+            elif self.b[self.k - 1] != 's':
+                self.k = self.k - 1
+        if self.ends("eed"):
+            if self.m() > 0:
+                self.k = self.k - 1
+        elif (self.ends("ed") or self.ends("ing")) and self.vowelinstem():
+            self.k = self.j
+            if self.ends("at"):
+                self.setto("ate")
+            elif self.ends("bl"):
+                self.setto("ble")
+            elif self.ends("iz"):
+                self.setto("ize")
+            elif self.doublec(self.k):
+                self.k = self.k - 1
+                ch = self.b[self.k]
+                if ch == 'l' or ch == 's' or ch == 'z':
+                    self.k = self.k + 1
+            elif (self.m() == 1 and self.cvc(self.k)):
+                self.setto("e")
+
+    def step1c(self):
+        """step1c() turns terminal y to i when there is another vowel in the stem."""
+        if (self.ends("y") and self.vowelinstem()):
+            self.b = self.b[:self.k] + 'i' + self.b[self.k + 1:]
+
+    def step2(self):
+        """step2() maps double suffices to single ones.
+        so -ization ( = -ize plus -ation) maps to -ize etc. note that the
+        string before the suffix must give m() > 0.
+        """
+        if self.b[self.k - 1] == 'a':
+            if self.ends("ational"):
+                self.r("ate")
+            elif self.ends("tional"):
+                self.r("tion")
+        elif self.b[self.k - 1] == 'c':
+            if self.ends("enci"):
+                self.r("ence")
+            elif self.ends("anci"):
+                self.r("ance")
+        elif self.b[self.k - 1] == 'e':
+            if self.ends("izer"):
+                self.r("ize")
+        elif self.b[self.k - 1] == 'l':
+            if self.ends("bli"):
+                self.r("ble")  # --DEPARTURE--
+            # To match the published algorithm, replace this phrase with
+            #   if self.ends("abli"):      self.r("able")
+            elif self.ends("alli"):
+                self.r("al")
+            elif self.ends("entli"):
+                self.r("ent")
+            elif self.ends("eli"):
+                self.r("e")
+            elif self.ends("ousli"):
+                self.r("ous")
+        elif self.b[self.k - 1] == 'o':
+            if self.ends("ization"):
+                self.r("ize")
+            elif self.ends("ation"):
+                self.r("ate")
+            elif self.ends("ator"):
+                self.r("ate")
+        elif self.b[self.k - 1] == 's':
+            if self.ends("alism"):
+                self.r("al")
+            elif self.ends("iveness"):
+                self.r("ive")
+            elif self.ends("fulness"):
+                self.r("ful")
+            elif self.ends("ousness"):
+                self.r("ous")
+        elif self.b[self.k - 1] == 't':
+            if self.ends("aliti"):
+                self.r("al")
+            elif self.ends("iviti"):
+                self.r("ive")
+            elif self.ends("biliti"):
+                self.r("ble")
+        elif self.b[self.k - 1] == 'g':  # --DEPARTURE--
+            if self.ends("logi"):
+                self.r("log")
+        # To match the published algorithm, delete this phrase
+
+    def step3(self):
+        """step3() dels with -ic-, -full, -ness etc. similar strategy to step2."""
+        if self.b[self.k] == 'e':
+            if self.ends("icate"):
+                self.r("ic")
+            elif self.ends("ative"):
+                self.r("")
+            elif self.ends("alize"):
+                self.r("al")
+        elif self.b[self.k] == 'i':
+            if self.ends("iciti"):
+                self.r("ic")
+        elif self.b[self.k] == 'l':
+            if self.ends("ical"):
+                self.r("ic")
+            elif self.ends("ful"):
+                self.r("")
+        elif self.b[self.k] == 's':
+            if self.ends("ness"):
+                self.r("")
+
+    def step4(self):
+        """step4() takes off -ant, -ence etc., in context <c>vcvc<v>."""
+        if self.b[self.k - 1] == 'a':
+            if self.ends("al"):
+                pass
+            else:
+                return
+        elif self.b[self.k - 1] == 'c':
+            if self.ends("ance"):
+                pass
+            elif self.ends("ence"):
+                pass
+            else:
+                return
+        elif self.b[self.k - 1] == 'e':
+            if self.ends("er"):
+                pass
+            else:
+                return
+        elif self.b[self.k - 1] == 'i':
+            if self.ends("ic"):
+                pass
+            else:
+                return
+        elif self.b[self.k - 1] == 'l':
+            if self.ends("able"):
+                pass
+            elif self.ends("ible"):
+                pass
+            else:
+                return
+        elif self.b[self.k - 1] == 'n':
+            if self.ends("ant"):
+                pass
+            elif self.ends("ement"):
+                pass
+            elif self.ends("ment"):
+                pass
+            elif self.ends("ent"):
+                pass
+            else:
+                return
+        elif self.b[self.k - 1] == 'o':
+            if self.ends("ion") and (self.b[self.j] == 's' or self.b[self.j] == 't'):
+                pass
+            elif self.ends("ou"):
+                pass
+            # takes care of -ous
+            else:
+                return
+        elif self.b[self.k - 1] == 's':
+            if self.ends("ism"):
+                pass
+            else:
+                return
+        elif self.b[self.k - 1] == 't':
+            if self.ends("ate"):
+                pass
+            elif self.ends("iti"):
+                pass
+            else:
+                return
+        elif self.b[self.k - 1] == 'u':
+            if self.ends("ous"):
+                pass
+            else:
+                return
+        elif self.b[self.k - 1] == 'v':
+            if self.ends("ive"):
+                pass
+            else:
+                return
+        elif self.b[self.k - 1] == 'z':
+            if self.ends("ize"):
+                pass
+            else:
+                return
+        else:
+            return
+        if self.m() > 1:
+            self.k = self.j
+
+    def step5(self):
+        """step5() removes a final -e if m() > 1, and changes -ll to -l if
+        m() > 1.
+        """
+        self.j = self.k
+        if self.b[self.k] == 'e':
+            a = self.m()
+            if a > 1 or (a == 1 and not self.cvc(self.k - 1)):
+                self.k = self.k - 1
+        if self.b[self.k] == 'l' and self.doublec(self.k) and self.m() > 1:
+            self.k = self.k - 1
+
+    def stem(self, p, i, j):
+        """In stem(p,i,j), p is a char pointer, and the string to be stemmed
+        is from p[i] to p[j] inclusive. Typically i is zero and j is the
+        offset to the last character of a string, (p[j+1] == '\0'). The
+        stemmer adjusts the characters p[i] ... p[j] and returns the new
+        end-point of the string, k. Stemming never increases word length, so
+        i <= k <= j. To turn the stemmer into a module, declare 'stem' as
+        extern, and delete the remainder of this file.
+        """
+        # copy the parameters into statics
+        self.b = p
+        self.k = j
+        self.k0 = i
+        if self.k <= self.k0 + 1:
+            return self.b  # --DEPARTURE--
+
+        # With this line, strings of length 1 or 2 don't go through the
+        # stemming process, although no mention is made of this in the
+        # published algorithm. Remove the line to match the published
+        # algorithm.
+
+        self.step1ab()
+        self.step1c()
+        self.step2()
+        self.step3()
+        self.step4()
+        self.step5()
+        return self.b[self.k0:self.k + 1]
\ No newline at end of file
diff --git a/cmpe493/project1/stopwords.txt b/cmpe493/project1/stopwords.txt
new file mode 100644
index 0000000..1b5e1f4
--- /dev/null
+++ b/cmpe493/project1/stopwords.txt
@@ -0,0 +1,54 @@
+a
+all
+an
+and
+any
+are
+as
+be
+been
+but
+by 
+few
+for
+have
+he
+her
+here
+him
+his
+how
+i
+in
+is
+it
+its
+many
+me
+my
+none
+of
+on 
+or
+our
+she
+some
+the
+their
+them
+there
+they
+that 
+this
+us
+was
+what
+when
+where
+which
+who
+why
+will
+with
+you
+your
diff --git a/cmpe493/project1/tokenizer.py b/cmpe493/project1/tokenizer.py
new file mode 100644
index 0000000..5aa41db
--- /dev/null
+++ b/cmpe493/project1/tokenizer.py
@@ -0,0 +1,102 @@
+import re
+from string import punctuation, digits
+from stemmer import PorterStemmer
+
+class Tokenizer:
+    """
+        Handles tokenize operations
+    """
+
+    # Global stemmer instance of Tokenizer class
+    stemmer = PorterStemmer()
+
+    @staticmethod
+    def tokenize(text):
+        """
+            Tokenizes given text after normalizing and stemming
+        """
+        # Normalize given text
+        text = Tokenizer.normalize(text)
+        # Split text from whitespaces
+        words = text.split()
+        # Stem all tokens and return list
+        return list(map(Tokenizer.stem, words))
+
+    @staticmethod
+    def make_lower_case(text):
+        """
+            Makes given text lower case
+        """
+        return text.lower()
+
+    @staticmethod
+    def remove_stop_words(text):
+        """
+            Removes stop words from given text
+        """
+        # Get stop word list from file
+        stop_words = Tokenizer.stop_words()
+        # Replace stop words with empty string
+        remove_list_regex = re.compile(r'\b|\b'.join(map(re.escape, stop_words)))
+        return remove_list_regex.sub('', text)
+
+    @staticmethod
+    def stop_words():
+        """
+            Returns stop word list from text file
+        """
+        return open('stopwords.txt').read().split()
+
+    @staticmethod
+    def remove_extra_whitespaces(text):
+        """
+            Removes extra whitespaces from given text such as multiple adjencent space
+        """
+        return re.sub(r'\s+', ' ', text).strip()
+
+    @staticmethod
+    def remove_punctuation(text):
+        """
+            Removes punctuations from given text
+        """
+        # Replace punctuation with space instead of remove it for hand-to-mouth, six-week-old, euro-certificate
+        return text.translate(str.maketrans(punctuation, ' ' * len(punctuation)))
+
+    @staticmethod
+    def remove_digits(text):
+        """
+            Removes digits from given text
+        """
+        return text.translate(str.maketrans('', '', digits))
+
+    @staticmethod
+    def normalize(text):
+        """
+            Normalizes given text
+            Steps:
+                1. Make lowercase
+                2. Remove punctuation
+                3. Remove digit
+                4. Remove extra whitespace
+        """
+        text = Tokenizer.make_lower_case(text)
+        # text = Tokenizer.remove_stop_words(text)
+        text = Tokenizer.remove_punctuation(text)
+        text = Tokenizer.remove_digits(text)
+        text = Tokenizer.remove_extra_whitespaces(text)
+        return text
+        
+    @staticmethod
+    def normalize_and_stem(text):
+        """
+            Normalizes and stems given text
+        """
+        text = Tokenizer.normalize(text)
+        return Tokenizer.stem(text)
+
+    @staticmethod
+    def stem(word):
+        """
+            Stems given word with Porter Stemmer
+        """
+        return Tokenizer.stemmer.stem(word, 0, len(word) - 1)
diff --git a/cmpe493/project2/CmpE493_HW2_Report.pdf b/cmpe493/project2/CmpE493_HW2_Report.pdf
new file mode 100644
index 0000000..ced9aa5
Binary files /dev/null and b/cmpe493/project2/CmpE493_HW2_Report.pdf differ
diff --git a/cmpe493/project2/README.md b/cmpe493/project2/README.md
new file mode 100644
index 0000000..66e458a
--- /dev/null
+++ b/cmpe493/project2/README.md
@@ -0,0 +1,9 @@
+Entry point of my application is app.py
+
+You should use python 3.6
+
+Run my app with "python3 app.py" command.
+
+Stop word data have to be in current directory and named as "stopwords.txt"
+
+It takes about 2 minutes
diff --git a/cmpe493/project2/app.py b/cmpe493/project2/app.py
new file mode 100644
index 0000000..b4632e4
--- /dev/null
+++ b/cmpe493/project2/app.py
@@ -0,0 +1,161 @@
+
+from reader import Reader
+from topic import Topic
+
+
+# TRANING
+# traning_docs, test_docs = Reader.read_files('test')
+traning_docs, test_docs = Reader.read_files('reuters21578')
+
+number_of_docs = len(traning_docs)
+
+# Find vocabulary length
+vocabulary = []
+for doc in traning_docs:
+    vocabulary += doc.words
+vocabulary = set(vocabulary)
+vocabulary_length = len(vocabulary)
+
+topics = ["earn", "acq", "money-fx", "grain", "crude"]
+
+knowledge = {}
+
+# Train topic with all lexicons
+for topic in topics:
+    t = Topic(name=topic, documents=[doc for doc in traning_docs if doc.topic == topic], total_n_docs=number_of_docs, vocabulary_length=vocabulary_length)
+    t.train_all_features()
+    knowledge[topic] = t
+
+# Select features
+feature_vocabulary = []
+for topic in knowledge.values():
+    topic.select_features(knowledge.values(), 50)
+    feature_vocabulary += topic.features
+
+# Find feature vocabulary length
+feature_vocabulary = set(feature_vocabulary)
+feature_vocabulary_length = len(feature_vocabulary)
+
+for topic in knowledge.values():
+    topic.train_mutual(feature_vocabulary)
+
+# TESTING
+print("Testing documents")
+for doc in test_docs:
+    doc.apply_bayes_with_all_features(knowledge.values(), vocabulary)
+    doc.apply_bayes_with_mutual(knowledge.values(), feature_vocabulary)
+
+# Calculating performance
+measures = {}
+measures['contingency'] = { "all": { "tp":0, "fp":0, "fn":0, "tn":0 }, "mutual": { "tp":0, "fp":0, "fn":0, "tn":0 } }
+measures['macro_total'] = { "all": { "precision":0, "recall":0, "f":0 }, "mutual": { "precision":0, "recall":0, "f":0 } }
+
+for topic in topics:
+    measures[topic] = { "all": { "tp":0, "fp":0, "fn":0, "tn":0 }, "mutual": { "tp":0, "fp":0, "fn":0, "tn":0 } }
+    for doc in test_docs:
+        if topic == doc.topic:
+            # Truth YES
+
+            # All Lexicon
+            if doc.topic == doc.guess_all:
+                # Classifier YES
+                measures[topic]["all"]["tp"] += 1
+                measures['contingency']["all"]["tp"] += 1
+            else:
+                # Classifier NO
+                measures[topic]["all"]["fn"] += 1
+                measures['contingency']["all"]["fn"] += 1
+
+            # Mutual Information
+            if doc.topic == doc.guess_mutual:
+                # Classifier YES
+                measures[topic]["mutual"]["tp"] += 1
+                measures['contingency']["mutual"]["tp"] += 1
+            else:
+                # Classifier NO
+                # Classifier NO
+                measures[topic]["mutual"]["fn"] += 1
+                measures['contingency']["mutual"]["fn"] += 1
+        else:
+            # Truth NO
+            # All Lexicon
+            if doc.topic == doc.guess_all:
+                # Classifier YES
+                measures[topic]["all"]["tn"] += 1
+                measures['contingency']["all"]["tn"] += 1
+            else:
+                # Classifier NO
+                measures[topic]["all"]["fp"] += 1
+                measures['contingency']["all"]["fp"] += 1
+
+            # Mutual Information
+            if doc.topic == doc.guess_mutual:
+                # Classifier YES
+                measures[topic]["mutual"]["tn"] += 1
+                measures['contingency']["mutual"]["tn"] += 1
+            else:
+                # Classifier NO
+                measures[topic]["mutual"]["fp"] += 1
+                measures['contingency']["mutual"]["fp"] += 1
+
+print("\n\t\t\t\t\tDOCUMENT COUNTS")
+print("Train")
+total = 0
+for key, topic in knowledge.items():
+    print(topic.name + ": " + str(len(topic.documents)))
+    total += len(topic.documents)
+print("Total: " + str(total))
+
+print("\n\nTest")
+total = {}
+for doc in test_docs:
+    count = total.get(doc.topic, 0)
+    count += 1
+    total[doc.topic] = count
+for topic, count in total.items():
+    print(topic + ": " + str(count))
+print("Total: " + str(sum(total.values())))
+
+print("\n\t\t\t\t\tSELECTED FEATURES")
+for key, topic in knowledge.items():
+    print(topic.name)
+    print(topic.features)
+    print("")
+
+print("\n\t\t\t\t\tPERFORMANCE VALUES")
+# Calculate precision, recall and f-measure
+for topic, measure in measures.items():
+    if topic != "macro_total":
+        for classiﬁer, values in measure.items():
+            precision = measures[topic][classiﬁer]['tp'] / (measures[topic][classiﬁer]['tp'] + measures[topic][classiﬁer]['fp'])
+            recall = measures[topic][classiﬁer]['tp'] / (measures[topic][classiﬁer]['tp'] + measures[topic][classiﬁer]['fn'])
+            f = (2 * precision * recall) / (precision + recall)
+            
+            measures[topic][classiﬁer]['precision'] = precision
+            measures[topic][classiﬁer]['recall'] = recall
+            measures[topic][classiﬁer]['f'] = f
+
+            if topic != 'contingency':
+                print("\n" + topic + " - " + classiﬁer)
+                print("\tPrecision: " + str(precision))
+                print("\tRecall:    " + str(recall))
+                print("\tF-measure: " + str(f))
+                measures['macro_total'][classiﬁer]['precision'] += precision
+                measures['macro_total'][classiﬁer]['recall'] += recall
+                measures['macro_total'][classiﬁer]['f'] += f
+
+print("\nTraning with All Lexicon")
+print("\tMacro-Averaged Precision: " + str(measures['macro_total']['all']['precision'] / len(topics)))
+print("\tMicro-Averaged Precision: " + str(measures['contingency']['all']['precision']))
+print("\n\tMacro-Averaged Recall:    " + str(measures['macro_total']['all']['recall'] / len(topics)))
+print("\tMicro-Averaged Recall:    " + str(measures['contingency']['all']['recall']))
+print("\n\tMacro-Averaged F-measure: " + str(measures['macro_total']['all']['f'] / len(topics)))
+print("\tMicro-Averaged F-measure: " + str(measures['contingency']['all']['f']))
+
+print("\n\nTraning with Selected Features by Mutual Information")
+print("\tMacro-Averaged Precision: " + str(measures['macro_total']['mutual']['precision'] / len(topics)))
+print("\tMicro-Averaged Precision: " + str(measures['contingency']['mutual']['precision']))
+print("\n\tMacro-Averaged Recall:    " + str(measures['macro_total']['mutual']['recall'] / len(topics)))
+print("\tMicro-Averaged Recall:    " + str(measures['contingency']['mutual']['recall']))
+print("\n\tMacro-Averaged F-measure: " + str(measures['macro_total']['mutual']['f'] / len(topics)))
+print("\tMicro-Averaged F-measure: " + str(measures['contingency']['mutual']['f']))
\ No newline at end of file
diff --git a/cmpe493/project2/cmpe493-assignment2-specification.pdf b/cmpe493/project2/cmpe493-assignment2-specification.pdf
new file mode 100644
index 0000000..306a675
Binary files /dev/null and b/cmpe493/project2/cmpe493-assignment2-specification.pdf differ
diff --git a/cmpe493/project2/document.py b/cmpe493/project2/document.py
new file mode 100644
index 0000000..8aa698d
--- /dev/null
+++ b/cmpe493/project2/document.py
@@ -0,0 +1,37 @@
+import operator
+
+class Document:
+    """
+        Handles documents
+    """
+    
+    def __init__(self, id, words, topic):
+        self.id = id
+        self.words = words
+        self.topic = topic
+
+    def apply_bayes_with_all_features(self, topics, vocabulary):
+        """
+            Guesses topic by naive bayes that trained by all lexicon
+        """
+        scores = {}
+        for topic in topics:
+            score = topic.prior
+            for word in self.words:
+                if word in vocabulary:
+                    score += topic.get_word_prob_all(word)
+            scores[topic.name] = score
+        self.guess_all = max(scores.items(), key=operator.itemgetter(1))[0]
+    
+    def apply_bayes_with_mutual(self, topics, vocabulary):
+        """
+            Guesses topic by naive bayes that trained by selected features
+        """
+        scores = {}
+        for topic in topics:
+            score = topic.prior
+            for word in self.words:
+                if word in vocabulary:
+                    score += topic.get_word_prob_mutual(word)
+            scores[topic.name] = score
+        self.guess_mutual = max(scores.items(), key=operator.itemgetter(1))[0]
\ No newline at end of file
diff --git a/cmpe493/project2/reader.py b/cmpe493/project2/reader.py
new file mode 100644
index 0000000..805ea5b
--- /dev/null
+++ b/cmpe493/project2/reader.py
@@ -0,0 +1,69 @@
+import os, re, pickle
+from tokenizer import Tokenizer
+from document import Document
+
+class Reader:
+    """
+        Handles reading operations
+    """
+    
+    topics = ["earn", "acq", "money-fx", "grain", "crude"]
+
+    @classmethod
+    def read_files(self, directory=None):
+        """
+            Returns read documents from data directory
+        """
+        print("Reading files")
+        # If no directory is given, set it to current directory
+        directory = os.getcwd() if directory is None else directory
+        filenames = os.listdir(directory)
+        # Get all file with .sgm extension
+        filenames = [filename for filename in filenames if filename.endswith(".sgm")]
+        filenames.sort()
+        traning_docs = []
+        test_docs = []
+        # Extract documents from each file
+        print("Extracting documents")
+        for filename in filenames:
+            raw_data = open(os.path.join(directory, filename), "r", encoding="latin-1").read()
+            traning, test = self.extract_documents(raw_data)
+            traning_docs += traning
+            test_docs += test
+        return traning_docs, test_docs
+
+    @classmethod
+    def extract_documents(self, raw_data):
+        """
+            Extracts documents from raw string
+        """
+        traning_docs = []
+        test_docs = []
+        # Seperate each document
+        raw_documents = raw_data.split('</REUTERS>')
+        # Extract information from each raw document string
+        for raw_document in raw_documents:
+            doc_id = re.match(r'.+?NEWID=\"(?P<id>\d+)\">.+?', raw_document, re.DOTALL)
+            doc_title = re.match(r'.+?<TITLE>(?P<title>.+?)</TITLE>.+?', raw_document, re.DOTALL)
+            doc_body = re.match(r'.+?<BODY>(?P<body>.+?)</BODY>.+?', raw_document, re.DOTALL)
+            doc_topics = re.match(r'.+?<TOPICS>(?P<topics>.+?)</TOPICS>.+?', raw_document, re.DOTALL)
+            if doc_topics:
+                doc_topics = re.findall(r'.*?<D>(?P<topics>.+?)</D>.*?', doc_topics.group('topics'), re.DOTALL)
+            doc_type = re.findall(r'LEWISSPLIT=\"(?P<type>\w+?)\"', raw_document)
+            doc_type = doc_type[0] if len(doc_type) == 1 else None
+
+            # If raw corpus has ID, it's a document, add it to list
+            if doc_id and doc_topics and doc_type:
+                intersect = list(set(self.topics) & set(doc_topics))
+                if len(intersect) == 1:
+                    doc_id = int(doc_id.group('id'))
+                    # If it's not have title or body, put empty string instead of them 
+                    doc_title = doc_title.group('title') if doc_title else ''
+                    doc_body = doc_body.group('body') if doc_body else ''
+                    doc_class = intersect[0]
+                    doc = Document(id=doc_id, words=Tokenizer.tokenize(doc_title + " " + doc_body), topic=doc_class)
+                    if doc_type == "TRAIN":
+                        traning_docs.append(doc)
+                    elif doc_type == "TEST":
+                        test_docs.append(doc)
+        return traning_docs, test_docs
\ No newline at end of file
diff --git a/cmpe493/project2/stopwords.txt b/cmpe493/project2/stopwords.txt
new file mode 100644
index 0000000..1b5e1f4
--- /dev/null
+++ b/cmpe493/project2/stopwords.txt
@@ -0,0 +1,54 @@
+a
+all
+an
+and
+any
+are
+as
+be
+been
+but
+by 
+few
+for
+have
+he
+her
+here
+him
+his
+how
+i
+in
+is
+it
+its
+many
+me
+my
+none
+of
+on 
+or
+our
+she
+some
+the
+their
+them
+there
+they
+that 
+this
+us
+was
+what
+when
+where
+which
+who
+why
+will
+with
+you
+your
diff --git a/cmpe493/project2/tokenizer.py b/cmpe493/project2/tokenizer.py
new file mode 100644
index 0000000..821f8e9
--- /dev/null
+++ b/cmpe493/project2/tokenizer.py
@@ -0,0 +1,80 @@
+import re
+from string import punctuation, digits
+
+class Tokenizer:
+    """
+        Handles tokenize operations
+    """
+
+
+    @staticmethod
+    def tokenize(text):
+        """
+            Tokenizes given text after normalizing
+        """
+        # Normalize given text
+        text = Tokenizer.normalize(text)
+        # Split text from whitespaces
+        return text.split()
+
+    @staticmethod
+    def make_lower_case(text):
+        """
+            Makes given text lower case
+        """
+        return text.lower()
+
+    @staticmethod
+    def remove_stop_words(text):
+        """
+            Removes stop words from given text
+        """
+        # Get stop word list from file
+        stop_words = Tokenizer.stop_words()
+        # Replace stop words with empty string
+        remove_list_regex = re.compile(r'\b|\b'.join(map(re.escape, stop_words)))
+        return remove_list_regex.sub('', text)
+
+    @staticmethod
+    def stop_words():
+        """
+            Returns stop word list from text file
+        """
+        return open('stopwords.txt').read().split()
+
+    @staticmethod
+    def remove_extra_whitespaces(text):
+        """
+            Removes extra whitespaces from given text such as multiple adjencent space
+        """
+        return re.sub(r'\s+', ' ', text).strip()
+
+    @staticmethod
+    def remove_punctuation(text):
+        """
+            Removes punctuations from given text
+        """
+        # Replace punctuation with space instead of remove it for hand-to-mouth, six-week-old, euro-certificate
+        return text.translate(str.maketrans(punctuation, ' ' * len(punctuation)))
+
+    @staticmethod
+    def remove_digits(text):
+        """
+            Removes digits from given text
+        """
+        return text.translate(str.maketrans('', '', digits))
+
+    @staticmethod
+    def normalize(text):
+        """
+            Normalizes given text
+            Steps:
+                1. Make lowercase
+                2. Remove punctuation
+                3. Remove digit
+                4. Remove extra whitespace
+        """
+        text = Tokenizer.make_lower_case(text)
+        text = Tokenizer.remove_stop_words(text)
+        text = Tokenizer.remove_punctuation(text)
+        return text
\ No newline at end of file
diff --git a/cmpe493/project2/topic.py b/cmpe493/project2/topic.py
new file mode 100644
index 0000000..a97f687
--- /dev/null
+++ b/cmpe493/project2/topic.py
@@ -0,0 +1,95 @@
+from collections import Counter
+import math
+import operator
+
+class Topic:  
+      
+    alpha = 1
+
+    def __init__(self, name, documents, total_n_docs, vocabulary_length):
+        self.name = name
+        self.documents = documents
+        self.text = []
+        for doc in self.documents:
+            self.text += doc.words
+        self.total_n_docs = total_n_docs
+        self.vocabulary_length = vocabulary_length
+
+    def train_all_features(self):
+        """
+            Trains Naive bayes with all lexicon
+        """
+        print("Traning \"" + self.name + "\" class with all features")
+        # Calculate P(c_j)
+        self.prior = math.log2(len(self.documents) / self.total_n_docs)
+
+        self.words_prob_all = {}
+        self.words_doc_count = {}
+        text_length = len(self.text)
+        counter = Counter(self.text)
+
+        for word in counter.keys():
+            occurence = counter.get(word, 0)
+            # Calculate P(w | c_j) for each word
+            self.words_prob_all[word] = math.log2((occurence + self.alpha) / (text_length + self.alpha * self.vocabulary_length))
+            
+            # Calculate document occurence count for each word
+            self.words_doc_count[word] = len([doc for doc in self.documents if word in doc.words])
+    
+    def get_word_prob_all(self, word):
+        """
+            Get conditional probability for word. If dictionary doesn't it, return smoothed value
+        """
+        return self.words_prob_all.get(word, math.log2(self.alpha / (len(self.text) + self.alpha * self.vocabulary_length)))
+
+    def train_mutual(self, feature_vocabulary):
+        """
+            Trains Naive bayes with selected features by mutual information
+        """
+        print("Traning \"" + self.name + "\" class with mutual information")
+        self.feature_vocabulary_length = len(feature_vocabulary)
+        self.words_prob_mutual = {}
+        self.feature_text = [word for word in self.text if word in feature_vocabulary]
+        text_length = len(self.feature_text)
+        counter = Counter(self.feature_text)
+        for word in counter.keys():
+            occurence = counter.get(word, 0)
+            # Calculate P(w | c_j) for each word based on selected words
+            self.words_prob_mutual[word] = math.log2((occurence + self.alpha) / (text_length + self.alpha * self.feature_vocabulary_length))
+
+    def get_word_prob_mutual(self, word):
+        """
+            Get conditional probability that calculated with selected features for word. If dictionary doesn't it, return smoothed value
+        """
+        return self.words_prob_mutual.get(word, math.log2(self.alpha / (len(self.feature_text) + self.alpha * self.feature_vocabulary_length)))
+    
+    def get_words_doc_count(self, word, contain = True):
+        """
+            Get documents occurences count if contain is False it returns documents count doesn't contain
+        """
+        if contain:
+            return self.words_doc_count.get(word, 0)
+        else:
+            return len(self.documents) - self.words_doc_count.get(word, 0)
+    
+    def select_features(self, topics, count):
+        """
+            Selects features via mutual information
+        """
+        print("Selecting feature for " + self.name + " class")
+        # Calculate utilization for each word
+        words_utility = {}
+        for word in set(self.text):
+            n11 = self.get_words_doc_count(word)
+            n01 = self.get_words_doc_count(word, False)
+            n10 = 0
+            n00 = 0
+            for topic in topics:
+                if self.name != topic.name:
+                    n10 += topic.get_words_doc_count(word)
+                    n00 += topic.get_words_doc_count(word, False)
+            n = n11 + n01 + n10 + n00
+            words_utility[word] = ((n11 / n) * math.log2((n * n11 + 1) / ((n11 + n10) * (n11 + n01)))) + ((n01 / n) * math.log2((n * n01 + 1) / ((n01 + n00) * (n01 + n11)))) + ((n10 / n) * math.log2((n * n10 + 1) / ((n10 + n11) * (n10 + n00)))) + ((n00 / n) * math.log2((n * n00 + 1) / ((n00 + n01) * (n00 + n10))))
+        # Get first 50 feature
+        self.features = [x[0] for x in sorted(words_utility.items(), key=operator.itemgetter(1), reverse=True)[:50]]
+
diff --git a/cmpe493/project3/CmpE493_HW3_Report.pdf b/cmpe493/project3/CmpE493_HW3_Report.pdf
new file mode 100644
index 0000000..bc1b172
Binary files /dev/null and b/cmpe493/project3/CmpE493_HW3_Report.pdf differ
diff --git a/cmpe493/project3/README.md b/cmpe493/project3/README.md
new file mode 100644
index 0000000..509275b
--- /dev/null
+++ b/cmpe493/project3/README.md
@@ -0,0 +1,29 @@
+Entry point of my application is app.py
+
+You should use python 3.6.5
+
+Run my app with "python3 app.py COMMAND DATA_DIRECTORY [FILE]" command.
+
+File name is optional.
+If any file name is specified, it run it for all files in data directory
+
+## Available Commands ##
+    lex      -   Return lex rank scores of sentences
+    summary  -   Return generated summary of new
+    gold     -   Return gold summary of new
+    rouge    -   Return average rouge scores of new
+
+## Example ##
+
+python3 app.py lex Dataset 1.txt
+    Print lex rank of sentences in 1.txt
+
+-----
+
+python3 app.py lex Dataset
+    Print lex rank of sentences of all files line by line
+
+-----
+
+python3 app.py rouge Dataset
+    Print average rouge scores of all files
\ No newline at end of file
diff --git a/cmpe493/project3/app.py b/cmpe493/project3/app.py
new file mode 100644
index 0000000..f03dfe9
--- /dev/null
+++ b/cmpe493/project3/app.py
@@ -0,0 +1,217 @@
+import os, sys
+import math
+import numpy as np
+from rouge import Rouge
+
+# CONSTANTS
+COSINE_SIMILARITY_THRESHOLD = 0.10
+TELEPORTATION_RATE = 0.15
+ERROR_TOLERANCE = 0.00001
+
+def read_news(directory):
+    """
+        Reads news from files. Returns news and summaries
+    """
+
+    filenames = os.listdir(directory)
+    # Get all file with .txt extension
+    filenames = [filename for filename in filenames if filename.endswith(".txt")]
+    filenames.sort()
+    news = {}
+    summaries = {}
+    # Extract news from each file
+    for filename in filenames:
+        raw_data = open(os.path.join(directory, filename), "r", encoding="utf-8").read()
+        raw_news = raw_data.split('\n\n')
+        news[filename] = raw_news[0].strip().split('\n')
+        summaries[filename] = raw_news[1].strip()
+    return news, summaries
+
+def calculate_idf(news):
+    """
+        Calculates idf for given news 
+    """
+
+    df = {}
+    idf = {}
+    N = len(news)
+    # Count document frequency for each term
+    for text in news.values():
+        tokens = " ".join(text).split()
+        terms = set(tokens)
+        for term in terms:
+            df[term] = df.get(term, 0) + 1
+    # Calculate idf for each term
+    for term, freq in df.items():
+        idf[term] = math.log10(N / freq)
+    return idf
+
+def calculate_tf_idf(sentence, idf, terms):
+    """
+        Calculates tf idf vector for given sentence 
+    """
+
+    counts = {}
+    # Count term frequencies
+    for token in sentence.split():
+        counts[token] = counts.get(token, 0) + 1
+    
+    tf_idf = []
+    for term in terms:
+        # Calculate tf
+        tf = 1 + math.log10(counts.get(term, 0.1))
+        # Add tf idf to result
+        tf_idf.append(tf * idf.get(term))
+    return tf_idf
+
+def unit_vector(vec):
+    """
+        Returns unit vector that points same direction with given vector 
+    """
+
+    return vec / np.linalg.norm(vec)
+
+def cosine_similarity(vec1, vec2):
+    """
+        Calculated cosine similarity between given two vectors 
+    """
+
+    return np.dot(unit_vector(vec1), unit_vector(vec2))
+
+def print_matrix(m):
+    """
+        Prints matrix prettier
+    """
+    for row in m:
+        text = ""
+        for column in row:
+            text += "{:^10.5f}".format(column)
+        print(text)
+
+def power_iteration(m):
+    """
+        Returns eagen vector of given matrix 
+    """
+    x = [1/len(m)] * len(m)
+    while True:
+        x_new = np.matmul(x, m)
+        for i in range(len(m)):
+            if abs(x_new[i] - x[i]) >= ERROR_TOLERANCE:
+                break
+            return x_new
+        x = x_new
+
+def calculate_lex_rank(sentences, idf):
+    """
+        Calculates lex rank of given sentences 
+    """
+
+    lex_ranks = []
+    tf_idf = []
+    terms = list(set(" ".join(sentences).split()))
+    dim = len(sentences)
+    # Calculate tf idfs of sentences
+    for sentence in sentences:
+        tf_idf.append(calculate_tf_idf(sentence, idf, terms))
+
+    # Build adjencency matrix with 0 and 1
+    adj_mat = []
+    for x in range(dim):
+        adj_mat.append([])
+        for y in range(dim):
+            cos_sim = cosine_similarity(tf_idf[x], tf_idf[y])
+            adj_mat[x].append(1 if (cos_sim >= COSINE_SIMILARITY_THRESHOLD) else 0)
+
+    # Convert adjencency matrix to probability matrix with teleportation rate
+    for x in range(dim):
+        N = sum(adj_mat[x])
+        for y in range(dim):
+            adj_mat[x][y] = ((adj_mat[x][y] / N) * (1 - TELEPORTATION_RATE)) + (TELEPORTATION_RATE / dim)
+    
+    return list(power_iteration(adj_mat))
+
+def summarize(sentences, idf):
+    """
+        Generates summary for given sentences 
+    """
+
+    maxest = []
+    lex_rank = calculate_lex_rank(sentences, idf)
+
+    # Find three sentences indexes that have highest lex rank
+    for i in range(3):
+        max_lex = max(lex_rank)
+        max_index = lex_rank.index(max_lex)
+        maxest.append(max_index)
+        lex_rank[max_index] = 0
+
+    # Sort indexes. Don't change topic flow
+    maxest.sort()
+    summary = []
+    for index in maxest:
+        summary.append(sentences[index])
+
+    return "\n".join(summary)
+
+###############################
+####### APP ENTRY POINT #######
+###############################
+
+# Check arguments
+if len(sys.argv) < 2:
+    print("You have to give command name")
+    print("python3 app.py [COMMAND] [DATA_DIRECTORY] [FILE_NAME]")
+    exit(1)
+elif len(sys.argv) < 3:
+    print("You have to give directory name")
+    print("python3 app.py [COMMAND] [DATA_DIRECTORY] [FILE_NAME]")
+    exit(1)
+
+command = sys.argv[1]
+directory = sys.argv[2]
+
+# Read data set
+news, summaries = read_news(directory)
+
+# if no file is given process all of them
+files = [sys.argv[3]] if len(sys.argv) == 4 else news.keys()
+
+# Calculate IDFs
+idf = calculate_idf(news)
+
+# Run command
+if command == "lex":
+    for file in files:
+        lex_rank = calculate_lex_rank(news[file], idf)
+        print(" ".join(["{:.3f}".format(rank) for rank in lex_rank]))
+elif command == "summary":
+    for file in files:
+        print(summarize(news[file], idf))
+elif command == "gold":
+    for file in files:
+        print(summaries[file])
+elif command == "rouge":
+    rouge = Rouge()
+    total = {}
+    total["rouge-1"] = {"f":0, "r":0, "p":0}
+    total["rouge-2"] = {"f":0, "r":0, "p":0}
+    total["rouge-l"] = {"f":0, "r":0, "p":0}
+    for file in files:
+        generated_summary = summarize(news[file], idf)
+        gold_summary = summaries[file]
+        scores = rouge.get_scores(gold_summary, generated_summary)
+        for type in ["rouge-1", "rouge-2", "rouge-l"]:
+            for stat in ["p", "r", "f"]:
+                total[type][stat] += scores[0][type][stat]
+    
+    # Divide sum to lenghts
+    for type in ["rouge-1", "rouge-2", "rouge-l"]:
+        for stat in ["p", "r", "f"]:
+            total[type][stat] /= len(files)
+    
+    print("Average Rouge Scores")
+    for type in ["rouge-1", "rouge-2", "rouge-l"]:
+        print(type)
+        for stat in ["p", "r", "f"]:
+            print("\t" + stat + ": " + str(total[type][stat]))
+        print("\n")
\ No newline at end of file
diff --git a/cmpe493/project3/cmpe493-assignment3-specification.pdf b/cmpe493/project3/cmpe493-assignment3-specification.pdf
new file mode 100644
index 0000000..e1679c7
Binary files /dev/null and b/cmpe493/project3/cmpe493-assignment3-specification.pdf differ