diff --git a/textproc/py-wordnet/Makefile b/textproc/py-wordnet/Makefile index daa8fc6b94ae..3fa937b7cef0 100644 --- a/textproc/py-wordnet/Makefile +++ b/textproc/py-wordnet/Makefile @@ -14,7 +14,7 @@ DISTFILES= pywordnet-${PORTVERSION}.tar.gz:pywn \ MAINTAINER= ports@FreeBSD.org COMMENT= Python Interface to WordNet -USES= python:3.6+ +USES= dos2unix python:3.6+ USE_PYTHON= distutils autoplist WRKSRC= ${WRKDIR}/py${PORTNAME}-${PORTVERSION} @@ -29,6 +29,8 @@ WN_DICTFILES= adj.exc adv.exc cntlist cntlist.rev data.adj data.adv \ verb.exc post-patch: +# concordance.py uses CR line terminators which cannot be fixed by USES=dos2unix + @${CP} ${FILESDIR}/concordance.py ${WRKSRC}/concordance.py @${SED} -e 's|/usr/local/wordnet2.0|${PREFIX}/${WORDNETDATA}|g' \ -e 's|os.path.join(WNHOME.*))|WNHOME)|g' ${WRKSRC}/wordnet.py \ > ${WRKSRC}/wordnet.py.tmp && ${CAT} ${WRKSRC}/wordnet.py.tmp \ diff --git a/textproc/py-wordnet/files/concordance.py b/textproc/py-wordnet/files/concordance.py new file mode 100644 index 000000000000..89caef0036ab --- /dev/null +++ b/textproc/py-wordnet/files/concordance.py @@ -0,0 +1,128 @@ +# some accessing of the semantic concordance data for wordnet 1.6 +# by Des Berry, berry@ais.it + +import string, os +from wordnet import binarySearchFile + +# Sample entries in the 'taglist' file +# ordinary%1:18:01:: 1 br-a01:78,1;86,1;88,4 +# ordered%5:00:00:organized:01 2 br-j23:6,14;13,32;66,12 +# where the general form is: +# lemma%ss_type:lex_filenum:lex_id:head_word:head_id sense_number +[location_list] +# location_list: filename:sent_num,word_num[;sent_num,word_num...] + +ss_type = ("NOUN", "VERB", "ADJECTIVE", "ADVERB", "ADJECTIVE SATELLITE") + +# given a sentence number (and the contents of a semantic concordance file) +# return a string of words as the sentence +def find_sentence(snum, msg): + str = "" % snum + s = string.find(msg, str) + if s < 0: + return "" + s = s + len(str) + sentence = "" + tag = "" + while 1: + if msg[s] == '\n': + s = s + 1 + n = string.find(msg, '<', s) + if n < 0: + break + if n - s != 0: + if tag == "w" and msg[s] != "'" and len(sentence) > 0: # word form + sentence = sentence + " " + sentence = sentence + msg[s:n] + e = string.find(msg, '>', n) + if e < 0: + break + tag = msg[n+1] + if tag == "/": #check for ending sentence + if msg[n+2] == 's': + #end of sentence + break + s = e + 1 + return sentence + +# given a taglist sense (one line of the tagfile) and where to find the tagfile (root) +# return a tuple of +# symset type ('1' .. '5') +# sense (numeric character string) +# list of sentences (constructed from the taglist) +def tagsentence(tag, root): + s = string.find(tag, '%') + sentence = [] + type = tag[s+1] + c = s + for i in range(0,4): + c = string.find(tag, ':', c + 1) + c = string.find(tag, ' ', c + 1) + sense = tag[c+1] + c = c + 3 + while 1: + d = string.find(tag, ' ', c) # file separator + if d < 0: + loclist = tag[c:] + else: + loclist = tag[c:d] + c = d + 1 + + e = string.find(loclist, ':') + filename = loclist[:e] + fh = open(root + filename, "rb") + msg = fh.read() + fh.close() + + while 1: + e = e + 1 + f = string.find(loclist, ';', e) + if f < 0: + sent_word = loclist[e:] + else: + sent_word = loclist[e:f] + e = f + + g = string.find(sent_word, ',') + sent = sent_word[:g] + + sentence.append(find_sentence(sent, msg)) + + if f < 0: + break + + if d < 0: + break + return (type, sense, sentence) + +# given a word to search for and where to find the files (root) +# displays the information +# This could be changed to display in different ways! +def sentences(word, root): + cache = {} + file = open(root + "taglist", "rb") + key = word + "%" + keylen = len(key) + binarySearchFile(file, key + " ", cache, 10) + print("Word '%s'" % word) + while 1: + line = file.readline() + if line[:keylen] != key: + break + type, sense, sentence = tagsentence(line, root + "tagfiles/") + print(ss_type[string.atoi(type) - 1], sense) + for sent in sentence: + print(sent) + + +def _test(word, corpus, base): + print(corpus) + sentences("ordinary", base + corpus + "/") + +if __name__ == '__main__': + base = "C:/win16/dict/semcor/" + word = "ordinary" + _test(word, "brown1", base) + _test(word, "brown2", base) + _test(word, "brownv", base) + diff --git a/textproc/py-wordnet/files/patch-2to3 b/textproc/py-wordnet/files/patch-2to3 new file mode 100644 index 000000000000..aba2f4cf6d93 --- /dev/null +++ b/textproc/py-wordnet/files/patch-2to3 @@ -0,0 +1,1594 @@ +--- wntools.py.orig 2004-07-19 05:09:43 UTC ++++ wntools.py +@@ -33,6 +33,7 @@ __author__ = "Oliver Steele " + __version__ = "2.0" + + from wordnet import * ++from functools import reduce + + # + # Domain utilities +@@ -41,9 +42,9 @@ from wordnet import * + def _requireSource(entity): + if not hasattr(entity, 'pointers'): + if isinstance(entity, Word): +- raise TypeError, `entity` + " is not a Sense or Synset. Try " + `entity` + "[0] instead." ++ raise TypeError(repr(entity) + " is not a Sense or Synset. Try " + repr(entity) + "[0] instead.") + else: +- raise TypeError, `entity` + " is not a Sense or Synset" ++ raise TypeError(repr(entity) + " is not a Sense or Synset") + + def tree(source, pointerType): + """ +@@ -64,9 +65,9 @@ def tree(source, pointerType): + >>> #pprint(tree(dog, HYPONYM)) # too verbose to include here + """ + if isinstance(source, Word): +- return map(lambda s, t=pointerType:tree(s,t), source.getSenses()) ++ return list(map(lambda s, t=pointerType:tree(s,t), source.getSenses())) + _requireSource(source) +- return [source] + map(lambda s, t=pointerType:tree(s,t), source.pointerTargets(pointerType)) ++ return [source] + list(map(lambda s, t=pointerType:tree(s,t), source.pointerTargets(pointerType))) + + def closure(source, pointerType, accumulator=None): + """Return the transitive closure of source under the pointerType +@@ -78,7 +79,7 @@ def closure(source, pointerType, accumulator=None): + ['dog' in {noun: dog, domestic dog, Canis familiaris}, {noun: canine, canid}, {noun: carnivore}, {noun: placental, placental mammal, eutherian, eutherian mammal}, {noun: mammal}, {noun: vertebrate, craniate}, {noun: chordate}, {noun: animal, animate being, beast, brute, creature, fauna}, {noun: organism, being}, {noun: living thing, animate thing}, {noun: object, physical object}, {noun: entity}] + """ + if isinstance(source, Word): +- return reduce(union, map(lambda s, t=pointerType:tree(s,t), source.getSenses())) ++ return reduce(union, list(map(lambda s, t=pointerType:tree(s,t), source.getSenses()))) + _requireSource(source) + if accumulator is None: + accumulator = [] +@@ -193,7 +194,7 @@ def product(u, v): + >>> product("123", "abc") + [('1', 'a'), ('1', 'b'), ('1', 'c'), ('2', 'a'), ('2', 'b'), ('2', 'c'), ('3', 'a'), ('3', 'b'), ('3', 'c')] + """ +- return flatten1(map(lambda a, v=v:map(lambda b, a=a:(a,b), v), u)) ++ return flatten1(list(map(lambda a, v=v:list(map(lambda b, a=a:(a,b), v)), u))) + + def removeDuplicates(sequence): + """Return a copy of _sequence_ with equal items removed. +@@ -242,12 +243,12 @@ def getIndex(form, pos='noun'): + transformed string until a match is found or all the different + strings have been tried. It returns a Word or None.""" + def trySubstitutions(trySubstitutions, form, substitutions, lookup=1, dictionary=dictionaryFor(pos)): +- if lookup and dictionary.has_key(form): ++ if lookup and form in dictionary: + return dictionary[form] + elif substitutions: + (old, new) = substitutions[0] + substitute = string.replace(form, old, new) and substitute != form +- if substitute and dictionary.has_key(substitute): ++ if substitute and substitute in dictionary: + return dictionary[substitute] + return trySubstitutions(trySubstitutions, form, substitutions[1:], lookup=0) or \ + (substitute and trySubstitutions(trySubstitutions, substitute, substitutions[1:])) +@@ -313,7 +314,7 @@ def morphy(form, pos='noun', collect=0): + exceptions = binarySearchFile(excfile, form) + if exceptions: + form = exceptions[string.find(exceptions, ' ')+1:-1] +- if lookup and dictionary.has_key(form): ++ if lookup and form in dictionary: + if collect: + collection.append(form) + else: +--- wordnet.py.orig 2004-07-19 06:11:31 UTC ++++ wordnet.py +@@ -53,9 +53,9 @@ WNHOME = environ.get('WNHOME', { + 'mac': ":", + 'dos': "C:\\wn16", + 'nt': "C:\\Program Files\\WordNet\\2.0"} +- .get(os.name, "/usr/local/wordnet2.0")) ++ .get(os.name, "/usr/local/share/py-wordnet")) + +-WNSEARCHDIR = environ.get('WNSEARCHDIR', os.path.join(WNHOME, {'mac': "Database"}.get(os.name, "dict"))) ++WNSEARCHDIR = environ.get('WNSEARCHDIR', WNHOME) + + ReadableRepresentations = 1 + """If true, repr(word), repr(sense), and repr(synset) return +@@ -210,15 +210,15 @@ class Word: + + def __init__(self, line): + """Initialize the word from a line of a WN POS file.""" +- tokens = string.split(line) +- ints = map(int, tokens[int(tokens[3]) + 4:]) +- self.form = string.replace(tokens[0], '_', ' ') ++ tokens = string.split(line) ++ ints = list(map(int, tokens[int(tokens[3]) + 4:])) ++ self.form = string.replace(tokens[0], '_', ' ') + "Orthographic representation of the word." +- self.pos = _normalizePOS(tokens[1]) ++ self.pos = _normalizePOS(tokens[1]) + "Part of speech. One of NOUN, VERB, ADJECTIVE, ADVERB." +- self.taggedSenseCount = ints[1] ++ self.taggedSenseCount = ints[1] + "Number of senses that are tagged." +- self._synsetOffsets = ints[2:ints[0]+2] ++ self._synsetOffsets = ints[2:ints[0]+2] + + def getPointers(self, pointerType=None): + """Pointers connect senses and synsets, not words. +@@ -231,18 +231,18 @@ class Word: + raise self.getPointers.__doc__ + + def getSenses(self): +- """Return a sequence of senses. +- +- >>> N['dog'].getSenses() +- ('dog' in {noun: dog, domestic dog, Canis familiaris}, 'dog' in {noun: frump, dog}, 'dog' in {noun: dog}, 'dog' in {noun: cad, bounder, blackguard, dog, hound, heel}, 'dog' in {noun: frank, frankfurter, hotdog, hot dog, dog, wiener, wienerwurst, weenie}, 'dog' in {noun: pawl, detent, click, dog}, 'dog' in {noun: andiron, firedog, dog, dog-iron}) +- """ +- if not hasattr(self, '_senses'): +- def getSense(offset, pos=self.pos, form=self.form): +- return getSynset(pos, offset)[form] +- self._senses = tuple(map(getSense, self._synsetOffsets)) +- del self._synsetOffsets +- return self._senses ++ """Return a sequence of senses. + ++ >>> N['dog'].getSenses() ++ ('dog' in {noun: dog, domestic dog, Canis familiaris}, 'dog' in {noun: frump, dog}, 'dog' in {noun: dog}, 'dog' in {noun: cad, bounder, blackguard, dog, hound, heel}, 'dog' in {noun: frank, frankfurter, hotdog, hot dog, dog, wiener, wienerwurst, weenie}, 'dog' in {noun: pawl, detent, click, dog}, 'dog' in {noun: andiron, firedog, dog, dog-iron}) ++ """ ++ if not hasattr(self, '_senses'): ++ def getSense(offset, pos=self.pos, form=self.form): ++ return getSynset(pos, offset)[form] ++ self._senses = tuple(map(getSense, self._synsetOffsets)) ++ del self._synsetOffsets ++ return self._senses ++ + # Deprecated. Present for backwards compatability. + def senses(self): + import wordnet +@@ -253,70 +253,70 @@ class Word: + return self.getSense() + + def isTagged(self): +- """Return 1 if any sense is tagged. +- +- >>> N['dog'].isTagged() +- 1 +- """ +- return self.taggedSenseCount > 0 ++ """Return 1 if any sense is tagged. ++ ++ >>> N['dog'].isTagged() ++ 1 ++ """ ++ return self.taggedSenseCount > 0 + + def getAdjectivePositions(self): +- """Return a sequence of adjective positions that this word can +- appear in. These are elements of ADJECTIVE_POSITIONS. +- +- >>> ADJ['clear'].getAdjectivePositions() +- [None, 'predicative'] +- """ +- positions = {} +- for sense in self.getSenses(): +- positions[sense.position] = 1 +- return positions.keys() ++ """Return a sequence of adjective positions that this word can ++ appear in. These are elements of ADJECTIVE_POSITIONS. ++ ++ >>> ADJ['clear'].getAdjectivePositions() ++ [None, 'predicative'] ++ """ ++ positions = {} ++ for sense in self.getSenses(): ++ positions[sense.position] = 1 ++ return list(positions.keys()) + + adjectivePositions = getAdjectivePositions # backwards compatability + + def __cmp__(self, other): +- """ +- >>> N['cat'] < N['dog'] +- 1 +- >>> N['dog'] < V['dog'] +- 1 +- """ +- return _compareInstances(self, other, ('pos', 'form')) ++ """ ++ >>> N['cat'] < N['dog'] ++ 1 ++ >>> N['dog'] < V['dog'] ++ 1 ++ """ ++ return _compareInstances(self, other, ('pos', 'form')) + + def __str__(self): +- """Return a human-readable representation. +- +- >>> str(N['dog']) +- 'dog(n.)' +- """ +- abbrs = {NOUN: 'n.', VERB: 'v.', ADJECTIVE: 'adj.', ADVERB: 'adv.'} +- return self.form + "(" + abbrs[self.pos] + ")" ++ """Return a human-readable representation. ++ ++ >>> str(N['dog']) ++ 'dog(n.)' ++ """ ++ abbrs = {NOUN: 'n.', VERB: 'v.', ADJECTIVE: 'adj.', ADVERB: 'adv.'} ++ return self.form + "(" + abbrs[self.pos] + ")" + + def __repr__(self): +- """If ReadableRepresentations is true, return a human-readable +- representation, e.g. 'dog(n.)'. +- +- If ReadableRepresentations is false, return a machine-readable +- representation, e.g. "getWord('dog', 'noun')". +- """ +- if ReadableRepresentations: +- return str(self) +- return "getWord" + `(self.form, self.pos)` +- ++ """If ReadableRepresentations is true, return a human-readable ++ representation, e.g. 'dog(n.)'. ++ ++ If ReadableRepresentations is false, return a machine-readable ++ representation, e.g. "getWord('dog', 'noun')". ++ """ ++ if ReadableRepresentations: ++ return str(self) ++ return "getWord" + repr((self.form, self.pos)) ++ + # + # Sequence protocol (a Word's elements are its Senses) + # +- def __nonzero__(self): +- return 1 ++ def __bool__(self): ++ return 1 + + def __len__(self): +- return len(self.getSenses()) ++ return len(self.getSenses()) + + def __getitem__(self, index): +- return self.getSenses()[index] ++ return self.getSenses()[index] + + def __getslice__(self, i, j): +- return self.getSenses()[i:j] ++ return self.getSenses()[i:j] + + + class Synset: +@@ -354,157 +354,157 @@ class Synset: + + def __init__(self, pos, offset, line): + "Initialize the synset from a line off a WN synset file." +- self.pos = pos ++ self.pos = pos + "part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB." +- self.offset = offset ++ self.offset = offset + """integer offset into the part-of-speech file. Together + with pos, this can be used as a unique id.""" +- tokens = string.split(line[:string.index(line, '|')]) +- self.ssType = tokens[2] +- self.gloss = string.strip(line[string.index(line, '|') + 1:]) ++ tokens = string.split(line[:string.index(line, '|')]) ++ self.ssType = tokens[2] ++ self.gloss = string.strip(line[string.index(line, '|') + 1:]) + self.lexname = Lexname.lexnames[int(tokens[1])] +- (self._senseTuples, remainder) = _partition(tokens[4:], 2, string.atoi(tokens[3], 16)) +- (self._pointerTuples, remainder) = _partition(remainder[1:], 4, int(remainder[0])) +- if pos == VERB: +- (vfTuples, remainder) = _partition(remainder[1:], 3, int(remainder[0])) +- def extractVerbFrames(index, vfTuples): +- return tuple(map(lambda t:string.atoi(t[1]), filter(lambda t,i=index:string.atoi(t[2],16) in (0, i), vfTuples))) +- senseVerbFrames = [] +- for index in range(1, len(self._senseTuples) + 1): +- senseVerbFrames.append(extractVerbFrames(index, vfTuples)) +- self._senseVerbFrames = senseVerbFrames +- self.verbFrames = tuple(extractVerbFrames(None, vfTuples)) ++ (self._senseTuples, remainder) = _partition(tokens[4:], 2, string.atoi(tokens[3], 16)) ++ (self._pointerTuples, remainder) = _partition(remainder[1:], 4, int(remainder[0])) ++ if pos == VERB: ++ (vfTuples, remainder) = _partition(remainder[1:], 3, int(remainder[0])) ++ def extractVerbFrames(index, vfTuples): ++ return tuple([string.atoi(t[1]) for t in list(filter(lambda t,i=index:string.atoi(t[2],16) in (0, i), vfTuples))]) ++ senseVerbFrames = [] ++ for index in range(1, len(self._senseTuples) + 1): ++ senseVerbFrames.append(extractVerbFrames(index, vfTuples)) ++ self._senseVerbFrames = senseVerbFrames ++ self.verbFrames = tuple(extractVerbFrames(None, vfTuples)) + """A sequence of integers that index into + VERB_FRAME_STRINGS. These list the verb frames that any + Sense in this synset participates in. (See also + Sense.verbFrames.) Defined only for verbs.""" + + def getSenses(self): +- """Return a sequence of Senses. +- +- >>> N['dog'][0].getSenses() +- ('dog' in {noun: dog, domestic dog, Canis familiaris},) +- """ +- if not hasattr(self, '_senses'): +- def loadSense(senseTuple, verbFrames=None, synset=self): +- return Sense(synset, senseTuple, verbFrames) +- if self.pos == VERB: +- self._senses = tuple(map(loadSense, self._senseTuples, self._senseVerbFrames)) +- del self._senseVerbFrames +- else: +- self._senses = tuple(map(loadSense, self._senseTuples)) +- del self._senseTuples +- return self._senses ++ """Return a sequence of Senses. ++ ++ >>> N['dog'][0].getSenses() ++ ('dog' in {noun: dog, domestic dog, Canis familiaris},) ++ """ ++ if not hasattr(self, '_senses'): ++ def loadSense(senseTuple, verbFrames=None, synset=self): ++ return Sense(synset, senseTuple, verbFrames) ++ if self.pos == VERB: ++ self._senses = tuple(map(loadSense, self._senseTuples, self._senseVerbFrames)) ++ del self._senseVerbFrames ++ else: ++ self._senses = tuple(map(loadSense, self._senseTuples)) ++ del self._senseTuples ++ return self._senses + + senses = getSenses + + def getPointers(self, pointerType=None): +- """Return a sequence of Pointers. ++ """Return a sequence of Pointers. + + If pointerType is specified, only pointers of that type are + returned. In this case, pointerType should be an element of + POINTER_TYPES. +- +- >>> N['dog'][0].getPointers()[:5] +- (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt}) +- >>> N['dog'][0].getPointers(HYPERNYM) +- (hypernym -> {noun: canine, canid},) +- """ +- if not hasattr(self, '_pointers'): +- def loadPointer(tuple, synset=self): +- return Pointer(synset.offset, tuple) +- self._pointers = tuple(map(loadPointer, self._pointerTuples)) +- del self._pointerTuples +- if pointerType == None: +- return self._pointers +- else: +- _requirePointerType(pointerType) +- return filter(lambda pointer, type=pointerType: pointer.type == type, self._pointers) ++ ++ >>> N['dog'][0].getPointers()[:5] ++ (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt}) ++ >>> N['dog'][0].getPointers(HYPERNYM) ++ (hypernym -> {noun: canine, canid},) ++ """ ++ if not hasattr(self, '_pointers'): ++ def loadPointer(tuple, synset=self): ++ return Pointer(synset.offset, tuple) ++ self._pointers = tuple(map(loadPointer, self._pointerTuples)) ++ del self._pointerTuples ++ if pointerType == None: ++ return self._pointers ++ else: ++ _requirePointerType(pointerType) ++ return list(filter(lambda pointer, type=pointerType: pointer.type == type, self._pointers)) + + pointers = getPointers # backwards compatability + + def getPointerTargets(self, pointerType=None): +- """Return a sequence of Senses or Synsets. +- ++ """Return a sequence of Senses or Synsets. ++ + If pointerType is specified, only targets of pointers of that + type are returned. In this case, pointerType should be an + element of POINTER_TYPES. +- +- >>> N['dog'][0].getPointerTargets()[:5] +- [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}] +- >>> N['dog'][0].getPointerTargets(HYPERNYM) +- [{noun: canine, canid}] +- """ +- return map(Pointer.target, self.getPointers(pointerType)) ++ ++ >>> N['dog'][0].getPointerTargets()[:5] ++ [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}] ++ >>> N['dog'][0].getPointerTargets(HYPERNYM) ++ [{noun: canine, canid}] ++ """ ++ return list(map(Pointer.target, self.getPointers(pointerType))) + + pointerTargets = getPointerTargets # backwards compatability + + def isTagged(self): +- """Return 1 if any sense is tagged. +- +- >>> N['dog'][0].isTagged() +- 1 +- >>> N['dog'][1].isTagged() +- 0 +- """ +- return len(filter(Sense.isTagged, self.getSenses())) > 0 ++ """Return 1 if any sense is tagged. ++ ++ >>> N['dog'][0].isTagged() ++ 1 ++ >>> N['dog'][1].isTagged() ++ 0 ++ """ ++ return len(list(filter(Sense.isTagged, self.getSenses()))) > 0 + + def __str__(self): +- """Return a human-readable representation. +- +- >>> str(N['dog'][0].synset) +- '{noun: dog, domestic dog, Canis familiaris}' +- """ +- return "{" + self.pos + ": " + string.joinfields(map(lambda sense:sense.form, self.getSenses()), ", ") + "}" ++ """Return a human-readable representation. ++ ++ >>> str(N['dog'][0].synset) ++ '{noun: dog, domestic dog, Canis familiaris}' ++ """ ++ return "{" + self.pos + ": " + string.joinfields([sense.form for sense in self.getSenses()], ", ") + "}" + + def __repr__(self): +- """If ReadableRepresentations is true, return a human-readable +- representation, e.g. 'dog(n.)'. +- +- If ReadableRepresentations is false, return a machine-readable +- representation, e.g. "getSynset(pos, 1234)". +- """ +- if ReadableRepresentations: +- return str(self) +- return "getSynset" + `(self.pos, self.offset)` ++ """If ReadableRepresentations is true, return a human-readable ++ representation, e.g. 'dog(n.)'. ++ ++ If ReadableRepresentations is false, return a machine-readable ++ representation, e.g. "getSynset(pos, 1234)". ++ """ ++ if ReadableRepresentations: ++ return str(self) ++ return "getSynset" + repr((self.pos, self.offset)) + + def __cmp__(self, other): +- return _compareInstances(self, other, ('pos', 'offset')) ++ return _compareInstances(self, other, ('pos', 'offset')) + + # + # Sequence protocol (a Synset's elements are its senses). + # +- def __nonzero__(self): +- return 1 ++ def __bool__(self): ++ return 1 + + def __len__(self): +- """ +- >>> len(N['dog'][0].synset) +- 3 +- """ +- return len(self.getSenses()) ++ """ ++ >>> len(N['dog'][0].synset) ++ 3 ++ """ ++ return len(self.getSenses()) + + def __getitem__(self, idx): +- """ +- >>> N['dog'][0].synset[0] == N['dog'][0] +- 1 +- >>> N['dog'][0].synset['dog'] == N['dog'][0] +- 1 +- >>> N['dog'][0].synset[N['dog']] == N['dog'][0] +- 1 +- >>> N['cat'][6] +- 'cat' in {noun: big cat, cat} +- """ +- senses = self.getSenses() +- if isinstance(idx, Word): +- idx = idx.form +- if isinstance(idx, StringType): +- idx = _index(idx, map(lambda sense:sense.form, senses)) or \ +- _index(idx, map(lambda sense:sense.form, senses), _equalsIgnoreCase) +- return senses[idx] ++ """ ++ >>> N['dog'][0].synset[0] == N['dog'][0] ++ 1 ++ >>> N['dog'][0].synset['dog'] == N['dog'][0] ++ 1 ++ >>> N['dog'][0].synset[N['dog']] == N['dog'][0] ++ 1 ++ >>> N['cat'][6] ++ 'cat' in {noun: big cat, cat} ++ """ ++ senses = self.getSenses() ++ if isinstance(idx, Word): ++ idx = idx.form ++ if isinstance(idx, StringType): ++ idx = _index(idx, [sense.form for sense in senses]) or \ ++ _index(idx, [sense.form for sense in senses], _equalsIgnoreCase) ++ return senses[idx] + + def __getslice__(self, i, j): +- return self.getSenses()[i:j] ++ return self.getSenses()[i:j] + + + class Sense: +@@ -525,7 +525,7 @@ class Sense: + VERB_FRAME_STRINGS. These list the verb frames that this + Sense partipates in. Defined only for verbs. + +- >>> decide = V['decide'][0].synset # first synset for 'decide' ++ >>> decide = V['decide'][0].synset # first synset for 'decide' + >>> decide[0].verbFrames + (8, 2, 26, 29) + >>> decide[1].verbFrames +@@ -536,124 +536,124 @@ class Sense: + + def __init__(sense, synset, senseTuple, verbFrames=None): + "Initialize a sense from a synset's senseTuple." +- # synset is stored by key (pos, synset) rather than object +- # reference, to avoid creating a circular reference between +- # Senses and Synsets that will prevent the vm from +- # garbage-collecting them. +- sense.pos = synset.pos ++ # synset is stored by key (pos, synset) rather than object ++ # reference, to avoid creating a circular reference between ++ # Senses and Synsets that will prevent the vm from ++ # garbage-collecting them. ++ sense.pos = synset.pos + "part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB" +- sense.synsetOffset = synset.offset ++ sense.synsetOffset = synset.offset + "synset key. This is used to retrieve the sense." +- sense.verbFrames = verbFrames ++ sense.verbFrames = verbFrames + """A sequence of integers that index into + VERB_FRAME_STRINGS. These list the verb frames that this + Sense partipates in. Defined only for verbs.""" +- (form, idString) = senseTuple +- sense.position = None +- if '(' in form: +- index = string.index(form, '(') +- key = form[index + 1:-1] +- form = form[:index] +- if key == 'a': +- sense.position = ATTRIBUTIVE +- elif key == 'p': +- sense.position = PREDICATIVE +- elif key == 'ip': +- sense.position = IMMEDIATE_POSTNOMINAL +- else: +- raise "unknown attribute " + key +- sense.form = string.replace(form, '_', ' ') ++ (form, idString) = senseTuple ++ sense.position = None ++ if '(' in form: ++ index = string.index(form, '(') ++ key = form[index + 1:-1] ++ form = form[:index] ++ if key == 'a': ++ sense.position = ATTRIBUTIVE ++ elif key == 'p': ++ sense.position = PREDICATIVE ++ elif key == 'ip': ++ sense.position = IMMEDIATE_POSTNOMINAL ++ else: ++ raise "unknown attribute " + key ++ sense.form = string.replace(form, '_', ' ') + "orthographic representation of the Word this is a Sense of." + + def __getattr__(self, name): +- # see the note at __init__ about why 'synset' is provided as a +- # 'virtual' slot +- if name == 'synset': +- return getSynset(self.pos, self.synsetOffset) ++ # see the note at __init__ about why 'synset' is provided as a ++ # 'virtual' slot ++ if name == 'synset': ++ return getSynset(self.pos, self.synsetOffset) + elif name == 'lexname': + return self.synset.lexname +- else: +- raise AttributeError, name ++ else: ++ raise AttributeError(name) + + def __str__(self): +- """Return a human-readable representation. +- +- >>> str(N['dog']) +- 'dog(n.)' +- """ +- return `self.form` + " in " + str(self.synset) ++ """Return a human-readable representation. ++ ++ >>> str(N['dog']) ++ 'dog(n.)' ++ """ ++ return repr(self.form) + " in " + str(self.synset) + + def __repr__(self): +- """If ReadableRepresentations is true, return a human-readable +- representation, e.g. 'dog(n.)'. +- +- If ReadableRepresentations is false, return a machine-readable +- representation, e.g. "getWord('dog', 'noun')". +- """ +- if ReadableRepresentations: +- return str(self) +- return "%s[%s]" % (`self.synset`, `self.form`) ++ """If ReadableRepresentations is true, return a human-readable ++ representation, e.g. 'dog(n.)'. ++ ++ If ReadableRepresentations is false, return a machine-readable ++ representation, e.g. "getWord('dog', 'noun')". ++ """ ++ if ReadableRepresentations: ++ return str(self) ++ return "%s[%s]" % (repr(self.synset), repr(self.form)) + + def getPointers(self, pointerType=None): +- """Return a sequence of Pointers. +- ++ """Return a sequence of Pointers. ++ + If pointerType is specified, only pointers of that type are + returned. In this case, pointerType should be an element of + POINTER_TYPES. +- +- >>> N['dog'][0].getPointers()[:5] +- (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt}) +- >>> N['dog'][0].getPointers(HYPERNYM) +- (hypernym -> {noun: canine, canid},) +- """ +- senseIndex = _index(self, self.synset.getSenses()) +- def pointsFromThisSense(pointer, selfIndex=senseIndex): +- return pointer.sourceIndex == 0 or pointer.sourceIndex - 1 == selfIndex +- return filter(pointsFromThisSense, self.synset.getPointers(pointerType)) ++ ++ >>> N['dog'][0].getPointers()[:5] ++ (hypernym -> {noun: canine, canid}, member meronym -> {noun: Canis, genus Canis}, member meronym -> {noun: pack}, hyponym -> {noun: pooch, doggie, doggy, barker, bow-wow}, hyponym -> {noun: cur, mongrel, mutt}) ++ >>> N['dog'][0].getPointers(HYPERNYM) ++ (hypernym -> {noun: canine, canid},) ++ """ ++ senseIndex = _index(self, self.synset.getSenses()) ++ def pointsFromThisSense(pointer, selfIndex=senseIndex): ++ return pointer.sourceIndex == 0 or pointer.sourceIndex - 1 == selfIndex ++ return list(filter(pointsFromThisSense, self.synset.getPointers(pointerType))) + + pointers = getPointers # backwards compatability + + def getPointerTargets(self, pointerType=None): +- """Return a sequence of Senses or Synsets. +- ++ """Return a sequence of Senses or Synsets. ++ + If pointerType is specified, only targets of pointers of that + type are returned. In this case, pointerType should be an + element of POINTER_TYPES. +- +- >>> N['dog'][0].getPointerTargets()[:5] +- [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}] +- >>> N['dog'][0].getPointerTargets(HYPERNYM) +- [{noun: canine, canid}] +- """ +- return map(Pointer.target, self.getPointers(pointerType)) ++ ++ >>> N['dog'][0].getPointerTargets()[:5] ++ [{noun: canine, canid}, {noun: Canis, genus Canis}, {noun: pack}, {noun: pooch, doggie, doggy, barker, bow-wow}, {noun: cur, mongrel, mutt}] ++ >>> N['dog'][0].getPointerTargets(HYPERNYM) ++ [{noun: canine, canid}] ++ """ ++ return list(map(Pointer.target, self.getPointers(pointerType))) + + pointerTargets = getPointerTargets # backwards compatability + + def getSenses(self): +- return self, ++ return self, + + senses = getSenses # backwards compatability + + def isTagged(self): +- """Return 1 if any sense is tagged. +- +- >>> N['dog'][0].isTagged() +- 1 +- >>> N['dog'][1].isTagged() +- 0 +- """ +- word = self.word() +- return _index(self, word.getSenses()) < word.taggedSenseCount ++ """Return 1 if any sense is tagged. ++ ++ >>> N['dog'][0].isTagged() ++ 1 ++ >>> N['dog'][1].isTagged() ++ 0 ++ """ ++ word = self.word() ++ return _index(self, word.getSenses()) < word.taggedSenseCount + + def getWord(self): +- return getWord(self.form, self.pos) ++ return getWord(self.form, self.pos) + + word = getWord # backwards compatability + + def __cmp__(self, other): +- def senseIndex(sense, synset=self.synset): +- return _index(sense, synset.getSenses(), testfn=lambda a,b: a.form == b.form) +- return _compareInstances(self, other, ('synset',)) or cmp(senseIndex(self), senseIndex(other)) ++ def senseIndex(sense, synset=self.synset): ++ return _index(sense, synset.getSenses(), testfn=lambda a,b: a.form == b.form) ++ return _compareInstances(self, other, ('synset',)) or cmp(senseIndex(self), senseIndex(other)) + + + class Pointer: +@@ -668,21 +668,21 @@ class Pointer: + """ + + _POINTER_TYPE_TABLE = { +- '!': ANTONYM, ++ '!': ANTONYM, + '@': HYPERNYM, + '~': HYPONYM, +- '=': ATTRIBUTE, ++ '=': ATTRIBUTE, + '^': ALSO_SEE, + '*': ENTAILMENT, + '>': CAUSE, +- '$': VERB_GROUP, +- '#m': MEMBER_MERONYM, ++ '$': VERB_GROUP, ++ '#m': MEMBER_MERONYM, + '#s': SUBSTANCE_MERONYM, + '#p': PART_MERONYM, +- '%m': MEMBER_HOLONYM, ++ '%m': MEMBER_HOLONYM, + '%s': SUBSTANCE_HOLONYM, + '%p': PART_HOLONYM, +- '&': SIMILAR, ++ '&': SIMILAR, + '<': PARTICIPLE_OF, + '\\': PERTAINYM, + # New in wn 2.0: +@@ -696,51 +696,51 @@ class Pointer: + } + + def __init__(self, sourceOffset, pointerTuple): +- (type, offset, pos, indices) = pointerTuple +- self.type = Pointer._POINTER_TYPE_TABLE[type] ++ (type, offset, pos, indices) = pointerTuple ++ self.type = Pointer._POINTER_TYPE_TABLE[type] + """One of POINTER_TYPES.""" +- self.sourceOffset = sourceOffset +- self.targetOffset = int(offset) +- self.pos = _normalizePOS(pos) ++ self.sourceOffset = sourceOffset ++ self.targetOffset = int(offset) ++ self.pos = _normalizePOS(pos) + """part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB""" +- indices = string.atoi(indices, 16) +- self.sourceIndex = indices >> 8 +- self.targetIndex = indices & 255 ++ indices = string.atoi(indices, 16) ++ self.sourceIndex = indices >> 8 ++ self.targetIndex = indices & 255 + + def getSource(self): +- synset = getSynset(self.pos, self.sourceOffset) +- if self.sourceIndex: +- return synset[self.sourceIndex - 1] +- else: +- return synset ++ synset = getSynset(self.pos, self.sourceOffset) ++ if self.sourceIndex: ++ return synset[self.sourceIndex - 1] ++ else: ++ return synset + + source = getSource # backwards compatability + + def getTarget(self): +- synset = getSynset(self.pos, self.targetOffset) +- if self.targetIndex: +- return synset[self.targetIndex - 1] +- else: +- return synset ++ synset = getSynset(self.pos, self.targetOffset) ++ if self.targetIndex: ++ return synset[self.targetIndex - 1] ++ else: ++ return synset + + target = getTarget # backwards compatability + + def __str__(self): +- return self.type + " -> " + str(self.target()) ++ return self.type + " -> " + str(self.target()) + + def __repr__(self): +- if ReadableRepresentations: +- return str(self) +- return "<" + str(self) + ">" ++ if ReadableRepresentations: ++ return str(self) ++ return "<" + str(self) + ">" + + def __cmp__(self, other): +- diff = _compareInstances(self, other, ('pos', 'sourceOffset')) +- if diff: +- return diff +- synset = self.source() +- def pointerIndex(sense, synset=synset): +- return _index(sense, synset.getPointers(), testfn=lambda a,b: not _compareInstances(a, b, ('type', 'sourceIndex', 'targetIndex'))) +- return cmp(pointerIndex(self), pointerIndex(other)) ++ diff = _compareInstances(self, other, ('pos', 'sourceOffset')) ++ if diff: ++ return diff ++ synset = self.source() ++ def pointerIndex(sense, synset=synset): ++ return _index(sense, synset.getPointers(), testfn=lambda a,b: not _compareInstances(a, b, ('type', 'sourceIndex', 'targetIndex'))) ++ return cmp(pointerIndex(self), pointerIndex(other)) + + + # Loading the lexnames +@@ -794,59 +794,59 @@ class Dictionary: + """ + + def __init__(self, pos, filenameroot): +- self.pos = pos ++ self.pos = pos + """part of speech -- one of NOUN, VERB, ADJECTIVE, ADVERB""" +- self.indexFile = _IndexFile(pos, filenameroot) +- self.dataFile = open(_dataFilePathname(filenameroot), _FILE_OPEN_MODE) ++ self.indexFile = _IndexFile(pos, filenameroot) ++ self.dataFile = open(_dataFilePathname(filenameroot), _FILE_OPEN_MODE) + + def __repr__(self): +- dictionaryVariables = {N: 'N', V: 'V', ADJ: 'ADJ', ADV: 'ADV'} +- if dictionaryVariables.get(self): +- return self.__module__ + "." + dictionaryVariables[self] +- return "<%s.%s instance for %s>" % (self.__module__, "Dictionary", self.pos) ++ dictionaryVariables = {N: 'N', V: 'V', ADJ: 'ADJ', ADV: 'ADV'} ++ if dictionaryVariables.get(self): ++ return self.__module__ + "." + dictionaryVariables[self] ++ return "<%s.%s instance for %s>" % (self.__module__, "Dictionary", self.pos) + + def getWord(self, form, line=None): +- key = string.replace(string.lower(form), ' ', '_') +- pos = self.pos +- def loader(key=key, line=line, indexFile=self.indexFile): +- line = line or indexFile.get(key) +- return line and Word(line) +- word = _entityCache.get((pos, key), loader) +- if word: +- return word +- else: +- raise KeyError, "%s is not in the %s database" % (`form`, `pos`) ++ key = string.replace(string.lower(form), ' ', '_') ++ pos = self.pos ++ def loader(key=key, line=line, indexFile=self.indexFile): ++ line = line or indexFile.get(key) ++ return line and Word(line) ++ word = _entityCache.get((pos, key), loader) ++ if word: ++ return word ++ else: ++ raise KeyError("%s is not in the %s database" % (repr(form), repr(pos))) + + def getSynset(self, offset): +- pos = self.pos +- def loader(pos=pos, offset=offset, dataFile=self.dataFile): +- return Synset(pos, offset, _lineAt(dataFile, offset)) +- return _entityCache.get((pos, offset), loader) ++ pos = self.pos ++ def loader(pos=pos, offset=offset, dataFile=self.dataFile): ++ return Synset(pos, offset, _lineAt(dataFile, offset)) ++ return _entityCache.get((pos, offset), loader) + + def _buildIndexCacheFile(self): +- self.indexFile._buildIndexCacheFile() ++ self.indexFile._buildIndexCacheFile() + + # + # Sequence protocol (a Dictionary's items are its Words) + # +- def __nonzero__(self): +- """Return false. (This is to avoid scanning the whole index file +- to compute len when a Dictionary is used in test position.) +- +- >>> N and 'true' +- 'true' +- """ +- return 1 ++ def __bool__(self): ++ """Return false. (This is to avoid scanning the whole index file ++ to compute len when a Dictionary is used in test position.) ++ ++ >>> N and 'true' ++ 'true' ++ """ ++ return 1 + + def __len__(self): +- """Return the number of index entries. +- +- >>> len(ADJ) +- 21435 +- """ +- if not hasattr(self, 'length'): +- self.length = len(self.indexFile) +- return self.length ++ """Return the number of index entries. ++ ++ >>> len(ADJ) ++ 21435 ++ """ ++ if not hasattr(self, 'length'): ++ self.length = len(self.indexFile) ++ return self.length + + def __getslice__(self, a, b): + results = [] +@@ -860,22 +860,22 @@ class Dictionary: + return results + + def __getitem__(self, index): +- """If index is a String, return the Word whose form is +- index. If index is an integer n, return the Word +- indexed by the n'th Word in the Index file. +- +- >>> N['dog'] +- dog(n.) +- >>> N[0] +- 'hood(n.) +- """ +- if isinstance(index, StringType): +- return self.getWord(index) +- elif isinstance(index, IntType): +- line = self.indexFile[index] +- return self.getWord(string.replace(line[:string.find(line, ' ')], '_', ' '), line) +- else: +- raise TypeError, "%s is not a String or Int" % `index` ++ """If index is a String, return the Word whose form is ++ index. If index is an integer n, return the Word ++ indexed by the n'th Word in the Index file. ++ ++ >>> N['dog'] ++ dog(n.) ++ >>> N[0] ++ 'hood(n.) ++ """ ++ if isinstance(index, StringType): ++ return self.getWord(index) ++ elif isinstance(index, IntType): ++ line = self.indexFile[index] ++ return self.getWord(string.replace(line[:string.find(line, ' ')], '_', ' '), line) ++ else: ++ raise TypeError("%s is not a String or Int" % repr(index)) + + # + # Dictionary protocol +@@ -884,54 +884,54 @@ class Dictionary: + # + + def get(self, key, default=None): +- """Return the Word whose form is _key_, or _default_. +- +- >>> N.get('dog') +- dog(n.) +- >>> N.get('inu') +- """ +- try: +- return self[key] +- except LookupError: +- return default ++ """Return the Word whose form is _key_, or _default_. ++ ++ >>> N.get('dog') ++ dog(n.) ++ >>> N.get('inu') ++ """ ++ try: ++ return self[key] ++ except LookupError: ++ return default + + def keys(self): +- """Return a sorted list of strings that index words in this +- dictionary.""" +- return self.indexFile.keys() ++ """Return a sorted list of strings that index words in this ++ dictionary.""" ++ return list(self.indexFile.keys()) + + def has_key(self, form): +- """Return true iff the argument indexes a word in this dictionary. +- +- >>> N.has_key('dog') +- 1 +- >>> N.has_key('inu') +- 0 +- """ +- return self.indexFile.has_key(form) ++ """Return true iff the argument indexes a word in this dictionary. ++ ++ >>> N.has_key('dog') ++ 1 ++ >>> N.has_key('inu') ++ 0 ++ """ ++ return form in self.indexFile + + # + # Testing + # + + def _testKeys(self): +- """Verify that index lookup can find each word in the index file.""" +- print "Testing: ", self +- file = open(self.indexFile.file.name, _FILE_OPEN_MODE) +- counter = 0 +- while 1: +- line = file.readline() +- if line == '': break +- if line[0] != ' ': +- key = string.replace(line[:string.find(line, ' ')], '_', ' ') +- if (counter % 1000) == 0: +- print "%s..." % (key,), +- import sys +- sys.stdout.flush() +- counter = counter + 1 +- self[key] +- file.close() +- print "done." ++ """Verify that index lookup can find each word in the index file.""" ++ print("Testing: ", self) ++ file = open(self.indexFile.file.name, _FILE_OPEN_MODE) ++ counter = 0 ++ while 1: ++ line = file.readline() ++ if line == '': break ++ if line[0] != ' ': ++ key = string.replace(line[:string.find(line, ' ')], '_', ' ') ++ if (counter % 1000) == 0: ++ print("%s..." % (key,), end=' ') ++ import sys ++ sys.stdout.flush() ++ counter = counter + 1 ++ self[key] ++ file.close() ++ print("done.") + + + class _IndexFile: +@@ -939,69 +939,69 @@ class _IndexFile: + Sequence and Dictionary interface to a sorted index file.""" + + def __init__(self, pos, filenameroot): +- self.pos = pos +- self.file = open(_indexFilePathname(filenameroot), _FILE_OPEN_MODE) +- self.offsetLineCache = {} # Table of (pathname, offset) -> (line, nextOffset) +- self.rewind() +- self.shelfname = os.path.join(WNSEARCHDIR, pos + ".pyidx") +- try: +- import shelve +- self.indexCache = shelve.open(self.shelfname, 'r') +- except: +- pass ++ self.pos = pos ++ self.file = open(_indexFilePathname(filenameroot), _FILE_OPEN_MODE) ++ self.offsetLineCache = {} # Table of (pathname, offset) -> (line, nextOffset) ++ self.rewind() ++ self.shelfname = os.path.join(WNSEARCHDIR, pos + ".pyidx") ++ try: ++ import shelve ++ self.indexCache = shelve.open(self.shelfname, 'r') ++ except: ++ pass + + def rewind(self): +- self.file.seek(0) +- while 1: +- offset = self.file.tell() +- line = self.file.readline() +- if (line[0] != ' '): +- break +- self.nextIndex = 0 +- self.nextOffset = offset ++ self.file.seek(0) ++ while 1: ++ offset = self.file.tell() ++ line = self.file.readline() ++ if (line[0] != ' '): ++ break ++ self.nextIndex = 0 ++ self.nextOffset = offset + + # + # Sequence protocol (an _IndexFile's items are its lines) + # +- def __nonzero__(self): +- return 1 ++ def __bool__(self): ++ return 1 + + def __len__(self): +- if hasattr(self, 'indexCache'): +- return len(self.indexCache) +- self.rewind() +- lines = 0 +- while 1: +- line = self.file.readline() +- if line == "": +- break +- lines = lines + 1 +- return lines ++ if hasattr(self, 'indexCache'): ++ return len(self.indexCache) ++ self.rewind() ++ lines = 0 ++ while 1: ++ line = self.file.readline() ++ if line == "": ++ break ++ lines = lines + 1 ++ return lines + +- def __nonzero__(self): +- return 1 ++ def __bool__(self): ++ return 1 + + def __getitem__(self, index): +- if isinstance(index, StringType): +- if hasattr(self, 'indexCache'): +- return self.indexCache[index] +- return binarySearchFile(self.file, index, self.offsetLineCache, 8) +- elif isinstance(index, IntType): +- if hasattr(self, 'indexCache'): +- return self.get(self.keys[index]) +- if index < self.nextIndex: +- self.rewind() +- while self.nextIndex <= index: +- self.file.seek(self.nextOffset) +- line = self.file.readline() +- if line == "": +- raise IndexError, "index out of range" +- self.nextIndex = self.nextIndex + 1 +- self.nextOffset = self.file.tell() +- return line +- else: +- raise TypeError, "%s is not a String or Int" % `index` +- ++ if isinstance(index, StringType): ++ if hasattr(self, 'indexCache'): ++ return self.indexCache[index] ++ return binarySearchFile(self.file, index, self.offsetLineCache, 8) ++ elif isinstance(index, IntType): ++ if hasattr(self, 'indexCache'): ++ return self.get(self.keys[index]) ++ if index < self.nextIndex: ++ self.rewind() ++ while self.nextIndex <= index: ++ self.file.seek(self.nextOffset) ++ line = self.file.readline() ++ if line == "": ++ raise IndexError("index out of range") ++ self.nextIndex = self.nextIndex + 1 ++ self.nextOffset = self.file.tell() ++ return line ++ else: ++ raise TypeError("%s is not a String or Int" % repr(index)) ++ + # + # Dictionary protocol + # +@@ -1009,62 +1009,62 @@ class _IndexFile: + # + + def get(self, key, default=None): +- try: +- return self[key] +- except LookupError: +- return default ++ try: ++ return self[key] ++ except LookupError: ++ return default + + def keys(self): +- if hasattr(self, 'indexCache'): +- keys = self.indexCache.keys() +- keys.sort() +- return keys +- else: +- keys = [] +- self.rewind() +- while 1: +- line = self.file.readline() +- if not line: break ++ if hasattr(self, 'indexCache'): ++ keys = list(self.indexCache.keys()) ++ keys.sort() ++ return keys ++ else: ++ keys = [] ++ self.rewind() ++ while 1: ++ line = self.file.readline() ++ if not line: break + key = line.split(' ', 1)[0] +- keys.append(key.replace('_', ' ')) +- return keys ++ keys.append(key.replace('_', ' ')) ++ return keys + + def has_key(self, key): +- key = key.replace(' ', '_') # test case: V['haze over'] +- if hasattr(self, 'indexCache'): +- return self.indexCache.has_key(key) +- return self.get(key) != None ++ key = key.replace(' ', '_') # test case: V['haze over'] ++ if hasattr(self, 'indexCache'): ++ return key in self.indexCache ++ return self.get(key) != None + + # + # Index file + # + + def _buildIndexCacheFile(self): +- import shelve +- import os +- print "Building %s:" % (self.shelfname,), +- tempname = self.shelfname + ".temp" +- try: +- indexCache = shelve.open(tempname) +- self.rewind() +- count = 0 +- while 1: +- offset, line = self.file.tell(), self.file.readline() +- if not line: break +- key = line[:string.find(line, ' ')] +- if (count % 1000) == 0: +- print "%s..." % (key,), +- import sys +- sys.stdout.flush() +- indexCache[key] = line +- count = count + 1 +- indexCache.close() +- os.rename(tempname, self.shelfname) +- finally: +- try: os.remove(tempname) +- except: pass +- print "done." +- self.indexCache = shelve.open(self.shelfname, 'r') ++ import shelve ++ import os ++ print("Building %s:" % (self.shelfname,), end=' ') ++ tempname = self.shelfname + ".temp" ++ try: ++ indexCache = shelve.open(tempname) ++ self.rewind() ++ count = 0 ++ while 1: ++ offset, line = self.file.tell(), self.file.readline() ++ if not line: break ++ key = line[:string.find(line, ' ')] ++ if (count % 1000) == 0: ++ print("%s..." % (key,), end=' ') ++ import sys ++ sys.stdout.flush() ++ indexCache[key] = line ++ count = count + 1 ++ indexCache.close() ++ os.rename(tempname, self.shelfname) ++ finally: ++ try: os.remove(tempname) ++ except: pass ++ print("done.") ++ self.indexCache = shelve.open(self.shelfname, 'r') + + + # +@@ -1091,20 +1091,20 @@ getword, getsense, getsynset = getWord, getSense, getS + + def _requirePointerType(pointerType): + if pointerType not in POINTER_TYPES: +- raise TypeError, `pointerType` + " is not a pointer type" ++ raise TypeError(repr(pointerType) + " is not a pointer type") + return pointerType + + def _compareInstances(a, b, fields): + """"Return -1, 0, or 1 according to a comparison first by type, + then by class, and finally by each of fields.""" # " <- for emacs + if not hasattr(b, '__class__'): +- return cmp(type(a), type(b)) ++ return cmp(type(a), type(b)) + elif a.__class__ != b.__class__: +- return cmp(a.__class__, b.__class__) ++ return cmp(a.__class__, b.__class__) + for field in fields: +- diff = cmp(getattr(a, field), getattr(b, field)) +- if diff: +- return diff ++ diff = cmp(getattr(a, field), getattr(b, field)) ++ if diff: ++ return diff + return 0 + + def _equalsIgnoreCase(a, b): +@@ -1122,14 +1122,14 @@ def _equalsIgnoreCase(a, b): + # + def _dataFilePathname(filenameroot): + if os.name in ('dos', 'nt'): +- path = os.path.join(WNSEARCHDIR, filenameroot + ".dat") ++ path = os.path.join(WNSEARCHDIR, filenameroot + ".dat") + if os.path.exists(path): + return path + return os.path.join(WNSEARCHDIR, "data." + filenameroot) + + def _indexFilePathname(filenameroot): + if os.name in ('dos', 'nt'): +- path = os.path.join(WNSEARCHDIR, filenameroot + ".idx") ++ path = os.path.join(WNSEARCHDIR, filenameroot + ".idx") + if os.path.exists(path): + return path + return os.path.join(WNSEARCHDIR, "index." + filenameroot) +@@ -1146,30 +1146,30 @@ def binarySearchFile(file, key, cache={}, cacheDepth=- + #if count > 20: + # raise "infinite loop" + lastState = start, end +- middle = (start + end) / 2 +- if cache.get(middle): +- offset, line = cache[middle] +- else: +- file.seek(max(0, middle - 1)) +- if middle > 0: +- file.readline() +- offset, line = file.tell(), file.readline() +- if currentDepth < cacheDepth: +- cache[middle] = (offset, line) ++ middle = (start + end) / 2 ++ if cache.get(middle): ++ offset, line = cache[middle] ++ else: ++ file.seek(max(0, middle - 1)) ++ if middle > 0: ++ file.readline() ++ offset, line = file.tell(), file.readline() ++ if currentDepth < cacheDepth: ++ cache[middle] = (offset, line) + #print start, middle, end, offset, line, +- if offset > end: +- assert end != middle - 1, "infinite loop" +- end = middle - 1 +- elif line[:keylen] == key:# and line[keylen + 1] == ' ': +- return line ++ if offset > end: ++ assert end != middle - 1, "infinite loop" ++ end = middle - 1 ++ elif line[:keylen] == key:# and line[keylen + 1] == ' ': ++ return line + #elif offset == end: + # return None +- elif line > key: +- assert end != middle - 1, "infinite loop" +- end = middle - 1 +- elif line < key: +- start = offset + len(line) - 1 +- currentDepth = currentDepth + 1 ++ elif line > key: ++ assert end != middle - 1, "infinite loop" ++ end = middle - 1 ++ elif line < key: ++ start = offset + len(line) - 1 ++ currentDepth = currentDepth + 1 + thisState = start, end + if lastState == thisState: + # detects the condition where we're searching past the end +@@ -1198,12 +1198,12 @@ def _index(key, sequence, testfn=None, keyfn=None): + """ + index = 0 + for element in sequence: +- value = element +- if keyfn: +- value = keyfn(value) +- if (not testfn and value == key) or (testfn and testfn(value, key)): +- return index +- index = index + 1 ++ value = element ++ if keyfn: ++ value = keyfn(value) ++ if (not testfn and value == key) or (testfn and testfn(value, key)): ++ return index ++ index = index + 1 + return None + + def _partition(sequence, size, count): +@@ -1216,7 +1216,7 @@ def _partition(sequence, size, count): + + partitions = [] + for index in range(0, size * count, size): +- partitions.append(sequence[index:index + size]) ++ partitions.append(sequence[index:index + size]) + return (partitions, sequence[size * count:]) + + +@@ -1261,49 +1261,49 @@ class _LRUCache: + but the two implementations aren't directly comparable.""" + + def __init__(this, capacity): +- this.capacity = capacity +- this.clear() ++ this.capacity = capacity ++ this.clear() + + def clear(this): +- this.values = {} +- this.history = {} +- this.oldestTimestamp = 0 +- this.nextTimestamp = 1 ++ this.values = {} ++ this.history = {} ++ this.oldestTimestamp = 0 ++ this.nextTimestamp = 1 + + def removeOldestEntry(this): +- while this.oldestTimestamp < this.nextTimestamp: +- if this.history.get(this.oldestTimestamp): +- key = this.history[this.oldestTimestamp] +- del this.history[this.oldestTimestamp] +- del this.values[key] +- return +- this.oldestTimestamp = this.oldestTimestamp + 1 ++ while this.oldestTimestamp < this.nextTimestamp: ++ if this.history.get(this.oldestTimestamp): ++ key = this.history[this.oldestTimestamp] ++ del this.history[this.oldestTimestamp] ++ del this.values[key] ++ return ++ this.oldestTimestamp = this.oldestTimestamp + 1 + + def setCapacity(this, capacity): +- if capacity == 0: +- this.clear() +- else: +- this.capacity = capacity +- while len(this.values) > this.capacity: +- this.removeOldestEntry() ++ if capacity == 0: ++ this.clear() ++ else: ++ this.capacity = capacity ++ while len(this.values) > this.capacity: ++ this.removeOldestEntry() + + def get(this, key, loadfn=None): +- value = None +- if this.values: +- pair = this.values.get(key) +- if pair: +- (value, timestamp) = pair +- del this.history[timestamp] +- if value == None: +- value = loadfn and loadfn() +- if this.values != None: +- timestamp = this.nextTimestamp +- this.nextTimestamp = this.nextTimestamp + 1 +- this.values[key] = (value, timestamp) +- this.history[timestamp] = key +- if len(this.values) > this.capacity: +- this.removeOldestEntry() +- return value ++ value = None ++ if this.values: ++ pair = this.values.get(key) ++ if pair: ++ (value, timestamp) = pair ++ del this.history[timestamp] ++ if value == None: ++ value = loadfn and loadfn() ++ if this.values != None: ++ timestamp = this.nextTimestamp ++ this.nextTimestamp = this.nextTimestamp + 1 ++ this.values[key] = (value, timestamp) ++ this.history[timestamp] = key ++ if len(this.values) > this.capacity: ++ this.removeOldestEntry() ++ return value + + + class _NullCache: +@@ -1311,10 +1311,10 @@ class _NullCache: + LRUCache implements), but doesn't store any values.""" + + def clear(): +- pass ++ pass + + def get(this, key, loadfn=None): +- return loadfn and loadfn() ++ return loadfn and loadfn() + + + DEFAULT_CACHE_CAPACITY = 1000 +@@ -1327,7 +1327,7 @@ def disableCache(): + def enableCache(): + """Enable the entity cache.""" + if not isinstance(_entityCache, LRUCache): +- _entityCache = _LRUCache(size) ++ _entityCache = _LRUCache(size) + + def clearCache(): + """Clear the entity cache.""" +@@ -1365,36 +1365,36 @@ def _initializePOSTables(): + _POSNormalizationTable = {} + _POStoDictionaryTable = {} + for pos, abbreviations in ( +- (NOUN, "noun n n."), +- (VERB, "verb v v."), +- (ADJECTIVE, "adjective adj adj. a s"), +- (ADVERB, "adverb adv adv. r")): +- tokens = string.split(abbreviations) +- for token in tokens: +- _POSNormalizationTable[token] = pos +- _POSNormalizationTable[string.upper(token)] = pos ++ (NOUN, "noun n n."), ++ (VERB, "verb v v."), ++ (ADJECTIVE, "adjective adj adj. a s"), ++ (ADVERB, "adverb adv adv. r")): ++ tokens = string.split(abbreviations) ++ for token in tokens: ++ _POSNormalizationTable[token] = pos ++ _POSNormalizationTable[string.upper(token)] = pos + for dict in Dictionaries: +- _POSNormalizationTable[dict] = dict.pos +- _POStoDictionaryTable[dict.pos] = dict ++ _POSNormalizationTable[dict] = dict.pos ++ _POStoDictionaryTable[dict.pos] = dict + + _initializePOSTables() + + def _normalizePOS(pos): + norm = _POSNormalizationTable.get(pos) + if norm: +- return norm +- raise TypeError, `pos` + " is not a part of speech type" ++ return norm ++ raise TypeError(repr(pos) + " is not a part of speech type") + + def _dictionaryFor(pos): + pos = _normalizePOS(pos) + dict = _POStoDictionaryTable.get(pos) + if dict == None: +- raise RuntimeError, "The " + `pos` + " dictionary has not been created" ++ raise RuntimeError("The " + repr(pos) + " dictionary has not been created") + return dict + + def buildIndexFiles(): + for dict in Dictionaries: +- dict._buildIndexCacheFile() ++ dict._buildIndexCacheFile() + + + # +@@ -1404,7 +1404,7 @@ def buildIndexFiles(): + def _testKeys(): + #This is slow, so don't do it as part of the normal test procedure. + for dictionary in Dictionaries: +- dictionary._testKeys() ++ dictionary._testKeys() + + def _test(reset=0): + import doctest, wordnet