1 | #! /usr/bin/env python
|
---|
2 |
|
---|
3 | class Markov:
|
---|
4 | def __init__(self, histsize, choice):
|
---|
5 | self.histsize = histsize
|
---|
6 | self.choice = choice
|
---|
7 | self.trans = {}
|
---|
8 | def add(self, state, next):
|
---|
9 | if not self.trans.has_key(state):
|
---|
10 | self.trans[state] = [next]
|
---|
11 | else:
|
---|
12 | self.trans[state].append(next)
|
---|
13 | def put(self, seq):
|
---|
14 | n = self.histsize
|
---|
15 | add = self.add
|
---|
16 | add(None, seq[:0])
|
---|
17 | for i in range(len(seq)):
|
---|
18 | add(seq[max(0, i-n):i], seq[i:i+1])
|
---|
19 | add(seq[len(seq)-n:], None)
|
---|
20 | def get(self):
|
---|
21 | choice = self.choice
|
---|
22 | trans = self.trans
|
---|
23 | n = self.histsize
|
---|
24 | seq = choice(trans[None])
|
---|
25 | while 1:
|
---|
26 | subseq = seq[max(0, len(seq)-n):]
|
---|
27 | options = trans[subseq]
|
---|
28 | next = choice(options)
|
---|
29 | if not next: break
|
---|
30 | seq = seq + next
|
---|
31 | return seq
|
---|
32 |
|
---|
33 | def test():
|
---|
34 | import sys, string, random, getopt
|
---|
35 | args = sys.argv[1:]
|
---|
36 | try:
|
---|
37 | opts, args = getopt.getopt(args, '0123456789cdw')
|
---|
38 | except getopt.error:
|
---|
39 | print 'Usage: markov [-#] [-cddqw] [file] ...'
|
---|
40 | print 'Options:'
|
---|
41 | print '-#: 1-digit history size (default 2)'
|
---|
42 | print '-c: characters (default)'
|
---|
43 | print '-w: words'
|
---|
44 | print '-d: more debugging output'
|
---|
45 | print '-q: no debugging output'
|
---|
46 | print 'Input files (default stdin) are split in paragraphs'
|
---|
47 | print 'separated blank lines and each paragraph is split'
|
---|
48 | print 'in words by whitespace, then reconcatenated with'
|
---|
49 | print 'exactly one space separating words.'
|
---|
50 | print 'Output consists of paragraphs separated by blank'
|
---|
51 | print 'lines, where lines are no longer than 72 characters.'
|
---|
52 | histsize = 2
|
---|
53 | do_words = 0
|
---|
54 | debug = 1
|
---|
55 | for o, a in opts:
|
---|
56 | if '-0' <= o <= '-9': histsize = eval(o[1:])
|
---|
57 | if o == '-c': do_words = 0
|
---|
58 | if o == '-d': debug = debug + 1
|
---|
59 | if o == '-q': debug = 0
|
---|
60 | if o == '-w': do_words = 1
|
---|
61 | if not args: args = ['-']
|
---|
62 | m = Markov(histsize, random.choice)
|
---|
63 | try:
|
---|
64 | for filename in args:
|
---|
65 | if filename == '-':
|
---|
66 | f = sys.stdin
|
---|
67 | if f.isatty():
|
---|
68 | print 'Sorry, need stdin from file'
|
---|
69 | continue
|
---|
70 | else:
|
---|
71 | f = open(filename, 'r')
|
---|
72 | if debug: print 'processing', filename, '...'
|
---|
73 | text = f.read()
|
---|
74 | f.close()
|
---|
75 | paralist = string.splitfields(text, '\n\n')
|
---|
76 | for para in paralist:
|
---|
77 | if debug > 1: print 'feeding ...'
|
---|
78 | words = string.split(para)
|
---|
79 | if words:
|
---|
80 | if do_words: data = tuple(words)
|
---|
81 | else: data = string.joinfields(words, ' ')
|
---|
82 | m.put(data)
|
---|
83 | except KeyboardInterrupt:
|
---|
84 | print 'Interrupted -- continue with data read so far'
|
---|
85 | if not m.trans:
|
---|
86 | print 'No valid input files'
|
---|
87 | return
|
---|
88 | if debug: print 'done.'
|
---|
89 | if debug > 1:
|
---|
90 | for key in m.trans.keys():
|
---|
91 | if key is None or len(key) < histsize:
|
---|
92 | print repr(key), m.trans[key]
|
---|
93 | if histsize == 0: print repr(''), m.trans['']
|
---|
94 | print
|
---|
95 | while 1:
|
---|
96 | data = m.get()
|
---|
97 | if do_words: words = data
|
---|
98 | else: words = string.split(data)
|
---|
99 | n = 0
|
---|
100 | limit = 72
|
---|
101 | for w in words:
|
---|
102 | if n + len(w) > limit:
|
---|
103 | print
|
---|
104 | n = 0
|
---|
105 | print w,
|
---|
106 | n = n + len(w) + 1
|
---|
107 | print
|
---|
108 | print
|
---|
109 |
|
---|
110 | def tuple(list):
|
---|
111 | if len(list) == 0: return ()
|
---|
112 | if len(list) == 1: return (list[0],)
|
---|
113 | i = len(list)//2
|
---|
114 | return tuple(list[:i]) + tuple(list[i:])
|
---|
115 |
|
---|
116 | if __name__ == "__main__":
|
---|
117 | test()
|
---|