Welcome to WordPress. This is your first post. Edit or delete it, then start writing!

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Download the stop words set
nltk.download('stopwords')
nltk.download('punkt')

# Load the stop words
stop_words = set(stopwords.words('english'))

def remove_common_words(text):
    # Tokenize the text
    word_tokens = word_tokenize(text)

    # Filter out the stop words
    filtered_text = [word for word in word_tokens if word.lower() not in stop_words]

    return ' '.join(filtered_text)

if __name__ == "__main__":
    # Read the document
    with open('document.txt', 'r') as file:
        document = file.read()

    # Remove common words and print
    cleaned_document = remove_common_words(document)
    print(cleaned_document)
One thought on “Hello world!”

Leave a Reply

Your email address will not be published. Required fields are marked *