import json
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

"""
Note: The following parts of speech occur in the dataset:

{None,
 'adjective',
 'adverb',
 'conjunction',
 'definite article',
 'noun',
 'obj',
 'pl',
 'preposition',
 'pronoun',
 'verb'}
"""


class WordsApi:
    def __init__(self, path, path_extra):
        self.path = path
        self.path_extra = path_extra
        with open(path, encoding='utf-8') as f:
            self.data = json.load(f)
        with open(path_extra, encoding='utf-8') as f:
            self.data.update(json.load(f))

    def lookup(self, word, pos='', offset=0, limit=10):
        word = word.lower()
        lemma = lemmatizer.lemmatize(word)
        definitions = []
        definitions_seen = set()
        for d in self.data.get(word, {}).get('definitions', []):
            if pos == '' or d.get('partOfSpeech') == pos:
                definition = d.get('definition', '')
                if definition not in definitions_seen:
                    definitions_seen.add(definition)
                    definitions.append({
                        'lemma': word,
                        'definition': definition,
                        'partOfSpeech': d.get('partOfSpeech', '')
                    })
        if word != lemma:
            for d in self.data.get(lemma, {}).get('definitions', []):
                if pos == '' or d.get('partOfSpeech') == pos:
                    definition = d.get('definition', '')
                    if definition not in definitions_seen:
                        definitions_seen.add(definition)
                        definitions.append({
                            'lemma': lemma,
                            'definition': definition,
                            'partOfSpeech': d.get('partOfSpeech', '')
                        })
        return definitions[offset: offset + limit]
