source: python/vendor/Python-2.7.6/Demo/scripts/markov.py

Last change on this file was 388, checked in by dmik, 11 years ago

python: Update vendor to 2.7.6.

  • Property svn:eol-style set to native
File size: 3.5 KB
Line 
1#! /usr/bin/env python
2
3class Markov:
4 def __init__(self, histsize, choice):
5 self.histsize = histsize
6 self.choice = choice
7 self.trans = {}
8
9 def add(self, state, next):
10 self.trans.setdefault(state, []).append(next)
11
12 def put(self, seq):
13 n = self.histsize
14 add = self.add
15 add(None, seq[:0])
16 for i in range(len(seq)):
17 add(seq[max(0, i-n):i], seq[i:i+1])
18 add(seq[len(seq)-n:], None)
19
20 def get(self):
21 choice = self.choice
22 trans = self.trans
23 n = self.histsize
24 seq = choice(trans[None])
25 while True:
26 subseq = seq[max(0, len(seq)-n):]
27 options = trans[subseq]
28 next = choice(options)
29 if not next:
30 break
31 seq += next
32 return seq
33
34
35def test():
36 import sys, random, getopt
37 args = sys.argv[1:]
38 try:
39 opts, args = getopt.getopt(args, '0123456789cdwq')
40 except getopt.error:
41 print 'Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0]
42 print 'Options:'
43 print '-#: 1-digit history size (default 2)'
44 print '-c: characters (default)'
45 print '-w: words'
46 print '-d: more debugging output'
47 print '-q: no debugging output'
48 print 'Input files (default stdin) are split in paragraphs'
49 print 'separated blank lines and each paragraph is split'
50 print 'in words by whitespace, then reconcatenated with'
51 print 'exactly one space separating words.'
52 print 'Output consists of paragraphs separated by blank'
53 print 'lines, where lines are no longer than 72 characters.'
54 sys.exit(2)
55 histsize = 2
56 do_words = False
57 debug = 1
58 for o, a in opts:
59 if '-0' <= o <= '-9': histsize = int(o[1:])
60 if o == '-c': do_words = False
61 if o == '-d': debug += 1
62 if o == '-q': debug = 0
63 if o == '-w': do_words = True
64 if not args:
65 args = ['-']
66
67 m = Markov(histsize, random.choice)
68 try:
69 for filename in args:
70 if filename == '-':
71 f = sys.stdin
72 if f.isatty():
73 print 'Sorry, need stdin from file'
74 continue
75 else:
76 f = open(filename, 'r')
77 if debug: print 'processing', filename, '...'
78 text = f.read()
79 f.close()
80 paralist = text.split('\n\n')
81 for para in paralist:
82 if debug > 1: print 'feeding ...'
83 words = para.split()
84 if words:
85 if do_words:
86 data = tuple(words)
87 else:
88 data = ' '.join(words)
89 m.put(data)
90 except KeyboardInterrupt:
91 print 'Interrupted -- continue with data read so far'
92 if not m.trans:
93 print 'No valid input files'
94 return
95 if debug: print 'done.'
96
97 if debug > 1:
98 for key in m.trans.keys():
99 if key is None or len(key) < histsize:
100 print repr(key), m.trans[key]
101 if histsize == 0: print repr(''), m.trans['']
102 print
103 while True:
104 data = m.get()
105 if do_words:
106 words = data
107 else:
108 words = data.split()
109 n = 0
110 limit = 72
111 for w in words:
112 if n + len(w) > limit:
113 print
114 n = 0
115 print w,
116 n += len(w) + 1
117 print
118 print
119
120if __name__ == "__main__":
121 test()
Note: See TracBrowser for help on using the repository browser.