Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Chatbot/ad_nltk.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
68 lines (58 sloc)
2.65 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# NLTK Project (2018) Natural Language Toolkit [online] available from | |
# <https://www.nltk.org>[10 November 2018] | |
import nltk | |
# Text Mining Online (2014) Dive Into NLTK, Part II: Sentence Tokenize and Word Tokenize [online] available from | |
# <https://textminingonline.com/dive-into-nltk-part-ii-sentence-tokenize-and-word-tokenize>[10 November 2018] | |
from nltk.tokenize import word_tokenize | |
# GeekforGeeks (n. d.) How to get synonyms/antonyms from NLTK WordNet in Python? [online] available from | |
# <https://www.geeksforgeeks.org/get-synonymsantonyms-nltk-wordnet-python/>[12 November 2018] | |
from nltk.corpus import wordnet | |
# Text Mining Online (2014) Dive Into NLTK, Part IV: Stemming and Lemmatization [online] available from | |
# <https://textminingonline.com/dive-into-nltk-part-iv-stemming-and-lemmatization>[10 November 2018] | |
from nltk.stem import WordNetLemmatizer | |
def find_word_synonyms(word): | |
"""Insert a word to return a list of synonyms.""" | |
syns = wordnet.synsets | |
word_synonyms = [] | |
synsets = syns(word) | |
for synset in synsets: | |
synonym = synset.lemmas()[0] | |
word_synonyms.append(synonym.name()) | |
return word_synonyms | |
########Functions that are not utilised in other codes.######## | |
def lemmatize_texts(text): #to lemmatize a word is to find its root words | |
"""Tokenize texts and lemmatize(find the root word of) the tokenized words. | |
Lemmatized words are returned as a list(lemmatized_tokens).""" | |
lemmatizer = WordNetLemmatizer | |
tokens = word_tokenize(text) | |
lemmatized_tokens = [] | |
for token in tokens: | |
#added additional argument "token", reasons for error still unknown | |
token = lemmatizer.lemmatize(token, token, pos='v') | |
lemmatized_tokens.append(token) | |
return lemmatized_tokens | |
def find_token_synonyms(tokens): | |
"""Utilise tokenized texts to return a list of synonyms.""" | |
#'tokens' need to be in list form | |
syns = wordnet.synsets | |
token_synonyms = [] | |
token_antonyms = [] | |
for token in tokens: | |
synsets = syns(token) | |
for synset in synsets: | |
synonym = synset.lemmas()[0] | |
token_synonyms.append(synonym.name()) | |
return token_synonyms | |
def find_antonyms(tokens): | |
"""Utilise tokenized texts to return a list of antonyms.""" | |
#'tokens' need to be in list form | |
syns = wordnet.synsets | |
token_synonyms = [] | |
token_antonyms = [] | |
for token in tokens: | |
synsets = syns(token) | |
for synset in synsets: | |
synonym = synset.lemmas()[0] | |
if synonym.antonyms(): #ignoring synonyms that are found | |
token_antonyms.append(synonym.antonyms()[0].name()) | |
return token_antonyms | |