Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

markov.py

Last change on this file was 388, checked in by dmik, 11 years ago
python: Update vendor to 2.7.6.
Property svn:eol-style set to `native`
File size: 3.5 KB

Line
1	#! /usr/bin/env python
2
3	class Markov:
4	def __init__(self, histsize, choice):
5	self.histsize = histsize
6	self.choice = choice
7	self.trans = {}
8
9	def add(self, state, next):
10	self.trans.setdefault(state, []).append(next)
11
12	def put(self, seq):
13	n = self.histsize
14	add = self.add
15	add(None, seq[:0])
16	for i in range(len(seq)):
17	add(seq[max(0, i-n):i], seq[i:i+1])
18	add(seq[len(seq)-n:], None)
19
20	def get(self):
21	choice = self.choice
22	trans = self.trans
23	n = self.histsize
24	seq = choice(trans[None])
25	while True:
26	subseq = seq[max(0, len(seq)-n):]
27	options = trans[subseq]
28	next = choice(options)
29	if not next:
30	break
31	seq += next
32	return seq
33
34
35	def test():
36	import sys, random, getopt
37	args = sys.argv[1:]
38	try:
39	opts, args = getopt.getopt(args, '0123456789cdwq')
40	except getopt.error:
41	print 'Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0]
42	print 'Options:'
43	print '-#: 1-digit history size (default 2)'
44	print '-c: characters (default)'
45	print '-w: words'
46	print '-d: more debugging output'
47	print '-q: no debugging output'
48	print 'Input files (default stdin) are split in paragraphs'
49	print 'separated blank lines and each paragraph is split'
50	print 'in words by whitespace, then reconcatenated with'
51	print 'exactly one space separating words.'
52	print 'Output consists of paragraphs separated by blank'
53	print 'lines, where lines are no longer than 72 characters.'
54	sys.exit(2)
55	histsize = 2
56	do_words = False
57	debug = 1
58	for o, a in opts:
59	if '-0' <= o <= '-9': histsize = int(o[1:])
60	if o == '-c': do_words = False
61	if o == '-d': debug += 1
62	if o == '-q': debug = 0
63	if o == '-w': do_words = True
64	if not args:
65	args = ['-']
66
67	m = Markov(histsize, random.choice)
68	try:
69	for filename in args:
70	if filename == '-':
71	f = sys.stdin
72	if f.isatty():
73	print 'Sorry, need stdin from file'
74	continue
75	else:
76	f = open(filename, 'r')
77	if debug: print 'processing', filename, '...'
78	text = f.read()
79	f.close()
80	paralist = text.split('\n\n')
81	for para in paralist:
82	if debug > 1: print 'feeding ...'
83	words = para.split()
84	if words:
85	if do_words:
86	data = tuple(words)
87	else:
88	data = ' '.join(words)
89	m.put(data)
90	except KeyboardInterrupt:
91	print 'Interrupted -- continue with data read so far'
92	if not m.trans:
93	print 'No valid input files'
94	return
95	if debug: print 'done.'
96
97	if debug > 1:
98	for key in m.trans.keys():
99	if key is None or len(key) < histsize:
100	print repr(key), m.trans[key]
101	if histsize == 0: print repr(''), m.trans['']
102	print
103	while True:
104	data = m.get()
105	if do_words:
106	words = data
107	else:
108	words = data.split()
109	n = 0
110	limit = 72
111	for w in words:
112	if n + len(w) > limit:
113	print
114	n = 0
115	print w,
116	n += len(w) + 1
117	print
118	print
119
120	if __name__ == "__main__":
121	test()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/vendor/Python-2.7.6/Demo/scripts/markov.py

Download in other formats: