Welcome to WordPress. This is your first post. Edit or delete it, then start writing!
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# Download the stop words set
nltk.download('stopwords')
nltk.download('punkt')
# Load the stop words
stop_words = set(stopwords.words('english'))
def remove_common_words(text):
# Tokenize the text
word_tokens = word_tokenize(text)
# Filter out the stop words
filtered_text = [word for word in word_tokens if word.lower() not in stop_words]
return ' '.join(filtered_text)
if __name__ == "__main__":
# Read the document
with open('document.txt', 'r') as file:
document = file.read()
# Remove common words and print
cleaned_document = remove_common_words(document)
print(cleaned_document)
Hi, this is a comment.
To get started with moderating, editing, and deleting comments, please visit the Comments screen in the dashboard.
Commenter avatars come from Gravatar.