Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

markov.py@ 27

Last change on this file since 27 was 2, checked in by Yuri Dario, 15 years ago
Initial import for vendor code.
Property svn:eol-style set to `native`
File size: 3.7 KB

Line
1	#! /usr/bin/env python
2
3	class Markov:
4	def __init__(self, histsize, choice):
5	self.histsize = histsize
6	self.choice = choice
7	self.trans = {}
8	def add(self, state, next):
9	if not self.trans.has_key(state):
10	self.trans[state] = [next]
11	else:
12	self.trans[state].append(next)
13	def put(self, seq):
14	n = self.histsize
15	add = self.add
16	add(None, seq[:0])
17	for i in range(len(seq)):
18	add(seq[max(0, i-n):i], seq[i:i+1])
19	add(seq[len(seq)-n:], None)
20	def get(self):
21	choice = self.choice
22	trans = self.trans
23	n = self.histsize
24	seq = choice(trans[None])
25	while 1:
26	subseq = seq[max(0, len(seq)-n):]
27	options = trans[subseq]
28	next = choice(options)
29	if not next: break
30	seq = seq + next
31	return seq
32
33	def test():
34	import sys, string, random, getopt
35	args = sys.argv[1:]
36	try:
37	opts, args = getopt.getopt(args, '0123456789cdw')
38	except getopt.error:
39	print 'Usage: markov [-#] [-cddqw] [file] ...'
40	print 'Options:'
41	print '-#: 1-digit history size (default 2)'
42	print '-c: characters (default)'
43	print '-w: words'
44	print '-d: more debugging output'
45	print '-q: no debugging output'
46	print 'Input files (default stdin) are split in paragraphs'
47	print 'separated blank lines and each paragraph is split'
48	print 'in words by whitespace, then reconcatenated with'
49	print 'exactly one space separating words.'
50	print 'Output consists of paragraphs separated by blank'
51	print 'lines, where lines are no longer than 72 characters.'
52	histsize = 2
53	do_words = 0
54	debug = 1
55	for o, a in opts:
56	if '-0' <= o <= '-9': histsize = eval(o[1:])
57	if o == '-c': do_words = 0
58	if o == '-d': debug = debug + 1
59	if o == '-q': debug = 0
60	if o == '-w': do_words = 1
61	if not args: args = ['-']
62	m = Markov(histsize, random.choice)
63	try:
64	for filename in args:
65	if filename == '-':
66	f = sys.stdin
67	if f.isatty():
68	print 'Sorry, need stdin from file'
69	continue
70	else:
71	f = open(filename, 'r')
72	if debug: print 'processing', filename, '...'
73	text = f.read()
74	f.close()
75	paralist = string.splitfields(text, '\n\n')
76	for para in paralist:
77	if debug > 1: print 'feeding ...'
78	words = string.split(para)
79	if words:
80	if do_words: data = tuple(words)
81	else: data = string.joinfields(words, ' ')
82	m.put(data)
83	except KeyboardInterrupt:
84	print 'Interrupted -- continue with data read so far'
85	if not m.trans:
86	print 'No valid input files'
87	return
88	if debug: print 'done.'
89	if debug > 1:
90	for key in m.trans.keys():
91	if key is None or len(key) < histsize:
92	print repr(key), m.trans[key]
93	if histsize == 0: print repr(''), m.trans['']
94	print
95	while 1:
96	data = m.get()
97	if do_words: words = data
98	else: words = string.split(data)
99	n = 0
100	limit = 72
101	for w in words:
102	if n + len(w) > limit:
103	print
104	n = 0
105	print w,
106	n = n + len(w) + 1
107	print
108	print
109
110	def tuple(list):
111	if len(list) == 0: return ()
112	if len(list) == 1: return (list[0],)
113	i = len(list)//2
114	return tuple(list[:i]) + tuple(list[i:])
115
116	if __name__ == "__main__":
117	test()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Demo/scripts/markov.py@ 27

Download in other formats: