Skip to content
Permalink
2f85e7af45
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
68 lines (58 sloc) 2.65 KB
# NLTK Project (2018) Natural Language Toolkit [online] available from
# <https://www.nltk.org>[10 November 2018]
import nltk
# Text Mining Online (2014) Dive Into NLTK, Part II: Sentence Tokenize and Word Tokenize [online] available from
# <https://textminingonline.com/dive-into-nltk-part-ii-sentence-tokenize-and-word-tokenize>[10 November 2018]
from nltk.tokenize import word_tokenize
# GeekforGeeks (n. d.) How to get synonyms/antonyms from NLTK WordNet in Python? [online] available from
# <https://www.geeksforgeeks.org/get-synonymsantonyms-nltk-wordnet-python/>[12 November 2018]
from nltk.corpus import wordnet
# Text Mining Online (2014) Dive Into NLTK, Part IV: Stemming and Lemmatization [online] available from
# <https://textminingonline.com/dive-into-nltk-part-iv-stemming-and-lemmatization>[10 November 2018]
from nltk.stem import WordNetLemmatizer
def find_word_synonyms(word):
"""Insert a word to return a list of synonyms."""
syns = wordnet.synsets
word_synonyms = []
synsets = syns(word)
for synset in synsets:
synonym = synset.lemmas()[0]
word_synonyms.append(synonym.name())
return word_synonyms
########Functions that are not utilised in other codes.########
def lemmatize_texts(text): #to lemmatize a word is to find its root words
"""Tokenize texts and lemmatize(find the root word of) the tokenized words.
Lemmatized words are returned as a list(lemmatized_tokens)."""
lemmatizer = WordNetLemmatizer
tokens = word_tokenize(text)
lemmatized_tokens = []
for token in tokens:
#added additional argument "token", reasons for error still unknown
token = lemmatizer.lemmatize(token, token, pos='v')
lemmatized_tokens.append(token)
return lemmatized_tokens
def find_token_synonyms(tokens):
"""Utilise tokenized texts to return a list of synonyms."""
#'tokens' need to be in list form
syns = wordnet.synsets
token_synonyms = []
token_antonyms = []
for token in tokens:
synsets = syns(token)
for synset in synsets:
synonym = synset.lemmas()[0]
token_synonyms.append(synonym.name())
return token_synonyms
def find_antonyms(tokens):
"""Utilise tokenized texts to return a list of antonyms."""
#'tokens' need to be in list form
syns = wordnet.synsets
token_synonyms = []
token_antonyms = []
for token in tokens:
synsets = syns(token)
for synset in synsets:
synonym = synset.lemmas()[0]
if synonym.antonyms(): #ignoring synonyms that are found
token_antonyms.append(synonym.antonyms()[0].name())
return token_antonyms