Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log
Repository URL

xdelta3-regtest.py@ 407

Last change on this file since 407 was 185, checked in by geyser, 18 years ago

File size: 32.5 KB

Line
1	#!/usr/bin/python2.4
2	# xdelta 3 - delta compression tools and library
3	# Copyright (C) 2003, 2006, 2007. Joshua P. MacDonald
4	#
5	# This program is free software; you can redistribute it and/or modify
6	# it under the terms of the GNU General Public License as published by
7	# the Free Software Foundation; either version 2 of the License, or
8	# (at your option) any later version.
9	#
10	# This program is distributed in the hope that it will be useful,
11	# but WITHOUT ANY WARRANTY; without even the implied warranty of
12	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	# GNU General Public License for more details.
14	#
15	# You should have received a copy of the GNU General Public License
16	# along with this program; if not, write to the Free Software
17	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19	# TODO: test 1.5 vs. greedy
20
21	import os, sys, math, re, time, types, array, random
22	import xdelta3main
23	import xdelta3
24
25	#RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS'
26	RCSDIR = '/tmp/PRCS_read_copy'
27	SAMPLEDIR = "/tmp/WESNOTH_tmp/diff"
28
29	#RCSDIR = 'G:/jmacd/PRCS/prcs/b'
30	#SAMPLEDIR = "C:/sample_data/Wesnoth/tar"
31
32	#
33	MIN_SIZE = 0
34
35	TIME_TOO_SHORT = 0.050
36
37	SKIP_TRIALS = 2
38	MIN_TRIALS = 3
39	MAX_TRIALS = 15
40
41	SKIP_DECODE = 1
42
43	# 10 = fast 1.5 = slow
44	MIN_STDDEV_PCT = 1.5
45
46	# How many results per round
47	MAX_RESULTS = 500
48	TEST_ROUNDS = 500
49	KEEP_P = (0.5)
50
51	# For RCS testing, what percent to select
52	FILE_P = (0.30)
53
54	# For run-speed tests
55	MIN_RUN = 1000 * 1000 * 1
56	MAX_RUN = 1000 * 1000 * 10
57
58	# Testwide defaults
59	ALL_ARGS = [
60	# -v
61	]
62
63	# The first 7 args go to -C
64	SOFT_CONFIG_CNT = 7
65
66	CONFIG_ORDER = [ 'large_look',
67	'large_step',
68	'small_look',
69	'small_chain',
70	'small_lchain',
71	'max_lazy',
72	'long_enough',
73
74	# > SOFT_CONFIG_CNT
75	'nocompress',
76	'winsize',
77	'srcwinsize',
78	'sprevsz',
79	'iopt',
80	'djw',
81	'altcode',
82	]
83
84	CONFIG_ARGMAP = {
85	'winsize' : '-W',
86	'srcwinsize' : '-B',
87	'sprevsz' : '-P',
88	'iopt' : '-I',
89	'nocompress' : '-N',
90	'djw' : '-Sdjw',
91	'altcode' : '-T',
92	}
93
94	def INPUT_SPEC(rand):
95	return {
96
97	# Time/space costs:
98
99	# -C 1,2,3,4,5,6,7
100	'large_look' : lambda d: rand.choice([9]),
101	'large_step' : lambda d: rand.choice([3, 5, 7, 8, 15]),
102	'small_chain' : lambda d: rand.choice([40, 10, 4, 1]),
103	'small_lchain' : lambda d: rand.choice([x for x in [10, 4, 2, 1] if x <= d['small_chain']]),
104	'max_lazy' : lambda d: rand.choice([9, 18, 27, 36, 72, 108]),
105	'long_enough' : lambda d: rand.choice([9, 18, 27, 36, 72, 108]),
106	'small_look' : lambda d: rand.choice([4]),
107
108	# -N
109	'nocompress' : lambda d: rand.choice(['true']),
110
111	# -T
112	'altcode' : lambda d: rand.choice(['false']),
113
114	# -S djw
115	'djw' : lambda d: rand.choice(['false']),
116
117	# Memory costs:
118
119	# -W
120	'winsize' : lambda d: 8 * (1<<20),
121
122	# -B
123	'srcwinsize' : lambda d: 64 * (1<<20),
124
125	# -I 0 is unlimited
126	'iopt' : lambda d: 0,
127
128	# -P only powers of two
129	'sprevsz' : lambda d: rand.choice([x * (1<<16) for x in [4]]),
130	}
131	#end
132
133	#
134	TMPDIR = '/tmp/xd3regtest.%d' % os.getpid()
135
136	RUNFILE = os.path.join(TMPDIR, 'run')
137	DFILE = os.path.join(TMPDIR, 'output')
138	RFILE = os.path.join(TMPDIR, 'recon')
139
140	HEAD_STATE = 0
141	BAR_STATE = 1
142	REV_STATE = 2
143	DATE_STATE = 3
144
145	#
146	IGNORE_FILENAME = re.compile('.\\.(gif\|jpg).')
147
148	# rcs output
149	RE_TOTREV = re.compile('total revisions: (\\d+)')
150	RE_BAR = re.compile('----------------------------')
151	RE_REV = re.compile('revision (.+)')
152	RE_DATE = re.compile('date: ([^;]+);.*')
153	# xdelta output
154	RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)')
155	RE_EXTCOMP = re.compile('XDELTA ext comp.*')
156
157	def c2str(c):
158	return ' '.join(['%s' % x for x in c])
159	#end
160
161	def SumList(l):
162	return reduce(lambda x,y: x+y, l)
163	#end
164
165	# returns (total, mean, stddev, q2 (median),
166	# (q3-q1)/2 ("semi-interquartile range"), max-min (spread))
167	class StatList:
168	def __init__(self,l,desc):
169	cnt = len(l)
170	assert(cnt > 1)
171	l.sort()
172	self.cnt = cnt
173	self.l = l
174	self.total = SumList(l)
175	self.mean = self.total / float(self.cnt)
176	self.s = math.sqrt(SumList([(x-self.mean) * (x - self.mean) for x in l]) / float(self.cnt-1))
177	self.q0 = l[0]
178	self.q1 = l[int(self.cnt/4.0+0.5)]
179	self.q2 = l[int(self.cnt/2.0+0.5)]
180	self.q3 = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))]
181	self.q4 = l[self.cnt-1]+1
182	self.siqr = (self.q3-self.q1)/2.0;
183	self.spread = (self.q4-self.q0)
184	self.str = '%s %d; mean %d; sdev %d; q2 %d; .5(q3-q1) %.1f; spread %d' % \
185	(desc, self.total, self.mean, self.s, self.q2, self.siqr, self.spread)
186	#end
187	#end
188
189	def RunCommand(args, ok = [0]):
190	#print 'run command %s' % (' '.join(args))
191	p = os.spawnvp(os.P_WAIT, args[0], args)
192	if p not in ok:
193	raise CommandError(args, 'exited %d' % p)
194	#end
195	#end
196
197	def RunCommandIO(args,infn,outfn):
198	p = os.fork()
199	if p == 0:
200	os.dup2(os.open(infn,os.O_RDONLY),0)
201	os.dup2(os.open(outfn,os.O_CREAT\|os.O_TRUNC\|os.O_WRONLY),1)
202	os.execvp(args[0], args)
203	else:
204	s = os.waitpid(p,0)
205	o = os.WEXITSTATUS(s[1])
206	if not os.WIFEXITED(s[1]) or o != 0:
207	raise CommandError(args, 'exited %d' % o)
208	#end
209	#end
210	#end
211
212	class TimedTest:
213	def __init__(self, target, source, runnable,
214	skip_trials = SKIP_TRIALS,
215	min_trials = MIN_TRIALS,
216	max_trials = MAX_TRIALS,
217	min_stddev_pct = MIN_STDDEV_PCT):
218	self.target = target
219	self.source = source
220	self.runnable = runnable
221
222	self.skip_trials = skip_trials
223	self.min_trials = min(min_trials, max_trials)
224	self.max_trials = max_trials
225	self.min_stddev_pct = min_stddev_pct
226
227	self.encode_time = self.DoTest(DFILE,
228	lambda x: x.Encode(self.target, self.source, DFILE))
229	self.encode_size = runnable.EncodeSize(DFILE)
230
231	if SKIP_DECODE:
232	self.decode_time = StatList([1, 1], 'not decoded')
233	return
234	#end
235
236	self.decode_time = self.DoTest(RFILE,
237	lambda x: x.Decode(DFILE, self.source, RFILE),
238	)
239
240	# verify
241	runnable.Verify(self.target, RFILE)
242	#end
243
244	def DoTest(self, fname, func):
245	trials = 0
246	measured = []
247
248	while 1:
249	try:
250	os.remove(fname)
251	except OSError:
252	pass
253
254	start_time = time.time()
255	start_clock = time.clock()
256
257	func(self.runnable)
258
259	total_clock = (time.clock() - start_clock)
260	total_time = (time.time() - start_time)
261
262	elap_time = max(total_time, 0.0000001)
263	elap_clock = max(total_clock, 0.0000001)
264
265	trials = trials + 1
266
267	# skip some of the first trials
268	if trials > self.skip_trials:
269	measured.append((elap_clock, elap_time))
270	#print 'measurement total: %.1f ms' % (total_time * 1000.0)
271
272	# at least so many
273	if trials < (self.skip_trials + self.min_trials):
274	#print 'continue: need more trials: %d' % trials
275	continue
276
277	# compute %variance
278	done = 0
279	if self.skip_trials + self.min_trials <= 2:
280	measured = measured + measured;
281	done = 1
282	#end
283
284	time_stat = StatList([x[1] for x in measured], 'elap time')
285	sp = float(time_stat.s) / float(time_stat.mean)
286
287	# what if MAX_TRIALS is exceeded?
288	too_many = (trials - self.skip_trials) >= self.max_trials
289	good = (100.0 * sp) < self.min_stddev_pct
290	if done or too_many or good:
291	trials = trials - self.skip_trials
292	if not done and not good:
293	#print 'too many trials: %d' % trials
294	pass
295	#clock = StatList([x[0] for x in measured], 'elap clock')
296	return time_stat
297	#end
298	#end
299	#end
300	#end
301
302	def Decimals(start, end):
303	l = []
304	step = start
305	while 1:
306	r = range(step, step * 10, step)
307	l = l + r
308	if step * 10 >= end:
309	l.append(step * 10)
310	break
311	step = step * 10
312	return l
313	#end
314
315	# This tests the raw speed of 0-byte inputs
316	def RunSpeedTest():
317	for L in Decimals(MIN_RUN, MAX_RUN):
318	SetFileSize(RUNFILE, L)
319
320	trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<20)]))
321	ReportSpeed(L, trx, '1MB ')
322
323	trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<19)]))
324	ReportSpeed(L, trx, '512k')
325
326	trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<18)]))
327	ReportSpeed(L, trx, '256k')
328
329	trm = TimedTest(RUNFILE, None, Xdelta3Mod1(RUNFILE))
330	ReportSpeed(L, trm, 'swig')
331
332	trg = TimedTest(RUNFILE, None, GzipRun1())
333	ReportSpeed(L,trg,'gzip')
334	#end
335	#end
336
337	def SetFileSize(F,L):
338	fd = os.open(F, os.O_CREAT \| os.O_WRONLY)
339	os.ftruncate(fd,L)
340	assert os.fstat(fd).st_size == L
341	os.close(fd)
342	#end
343
344	def ReportSpeed(L,tr,desc):
345	print '%s run length %u: size %u: time %.3f ms: decode %.3f ms' % \
346	(desc, L,
347	tr.encode_size,
348	tr.encode_time.mean * 1000.0,
349	tr.decode_time.mean * 1000.0)
350	#end
351
352	class Xdelta3RunClass:
353	def __init__(self, extra):
354	self.extra = extra
355	#end
356
357	def __str__(self):
358	return ' '.join(self.extra)
359	#end
360
361	def New(self):
362	return Xdelta3Runner(self.extra)
363	#end
364	#end
365
366	class Xdelta3Runner:
367	def __init__(self, extra):
368	self.extra = extra
369	#end
370
371	def Encode(self, target, source, output):
372	args = (ALL_ARGS +
373	self.extra +
374	['-e'])
375	if source:
376	args.append('-s')
377	args.append(source)
378	#end
379	args = args + [target, output]
380	self.Main(args)
381	#end
382
383	def Decode(self, input, source, output):
384	args = (ALL_ARGS +
385	['-d'])
386	if source:
387	args.append('-s')
388	args.append(source)
389	#end
390	args = args + [input, output]
391	self.Main(args)
392	#end
393
394	def Verify(self, target, recon):
395	RunCommand(('cmp', target, recon))
396	#end
397
398	def EncodeSize(self, output):
399	return os.stat(output).st_size
400	#end
401
402	def Main(self, args):
403	try:
404	xdelta3main.main(args)
405	except Exception, e:
406	raise CommandError(args, "xdelta3.main exception")
407	#end
408	#end
409	#end
410
411	class Xdelta3Mod1:
412	def __init__(self, file):
413	self.target_data = open(file, 'r').read()
414	#end
415
416	def Encode(self, ignore1, ignore2, ignore3):
417	r1, encoded = xdelta3.xd3_encode_memory(self.target_data, None, 1000000, 1<<10)
418	if r1 != 0:
419	raise CommandError('memory', 'encode failed: %s' % r1)
420	#end
421	self.encoded = encoded
422	#end
423
424	def Decode(self, ignore1, ignore2, ignore3):
425	r2, data1 = xdelta3.xd3_decode_memory(self.encoded, None, len(self.target_data))
426	if r2 != 0:
427	raise CommandError('memory', 'decode failed: %s' % r1)
428	#end
429	self.decoded = data1
430	#end
431
432	def Verify(self, ignore1, ignore2):
433	if self.target_data != self.decoded:
434	raise CommandError('memory', 'bad decode')
435	#end
436	#end
437
438	def EncodeSize(self, ignore1):
439	return len(self.encoded)
440	#end
441	#end
442
443	class GzipRun1:
444	def Encode(self, target, source, output):
445	assert source == None
446	RunCommandIO(['gzip', '-cf'], target, output)
447	#end
448
449	def Decode(self, input, source, output):
450	assert source == None
451	RunCommandIO(['gzip', '-dcf'], input, output)
452	#end
453
454	def Verify(self, target, recon):
455	RunCommand(('cmp', target, recon))
456	#end
457
458	def EncodeSize(self, output):
459	return os.stat(output).st_size
460	#end
461	#end
462
463	class Xdelta1RunClass:
464	def __str__(self):
465	return 'xdelta1'
466	#end
467
468	def New(self):
469	return Xdelta1Runner()
470	#end
471	#end
472
473	class Xdelta1Runner:
474	def Encode(self, target, source, output):
475	assert source != None
476	args = ['xdelta1', 'delta', '-q', source, target, output]
477	RunCommand(args, [0, 1])
478	#end
479
480	def Decode(self, input, source, output):
481	assert source != None
482	args = ['xdelta1', 'patch', '-q', input, source, output]
483	# Note: for dumb historical reasons, xdelta1 returns 1 or 0
484	RunCommand(args)
485	#end
486
487	def Verify(self, target, recon):
488	RunCommand(('cmp', target, recon))
489	#end
490
491	def EncodeSize(self, output):
492	return os.stat(output).st_size
493	#end
494	#end
495
496	# exceptions
497	class SkipRcsException:
498	def __init__(self,reason):
499	self.reason = reason
500	#end
501	#end
502
503	class NotEnoughVersions:
504	def __init__(self):
505	pass
506	#end
507	#end
508
509	class CommandError:
510	def __init__(self,cmd,str):
511	if type(cmd) is types.TupleType or \
512	type(cmd) is types.ListType:
513	cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd)
514	#end
515	print 'command was: ',cmd
516	print 'command failed: ',str
517	print 'have fun debugging'
518	#end
519	#end
520
521	class RcsVersion:
522	def __init__(self,vstr):
523	self.vstr = vstr
524	#end
525	def __cmp__(self,other):
526	return cmp(self.date, other.date)
527	#end
528	def __str__(self):
529	return str(self.vstr)
530	#end
531	#end
532
533	class RcsFile:
534
535	def __init__(self, fname):
536	self.fname = fname
537	self.versions = []
538	self.state = HEAD_STATE
539	#end
540
541	def SetTotRev(self,s):
542	self.totrev = int(s)
543	#end
544
545	def Rev(self,s):
546	self.rev = RcsVersion(s)
547	if len(self.versions) >= self.totrev:
548	raise SkipRcsException('too many versions (in log messages)')
549	#end
550	self.versions.append(self.rev)
551	#end
552
553	def Date(self,s):
554	self.rev.date = s
555	#end
556
557	def Match(self, line, state, rx, gp, newstate, f):
558	if state == self.state:
559	m = rx.match(line)
560	if m:
561	if f:
562	f(m.group(gp))
563	#end
564	self.state = newstate
565	return 1
566	#end
567	#end
568	return None
569	#end
570
571	def Sum1Rlog(self):
572	f = os.popen('rlog '+self.fname, "r")
573	l = f.readline()
574	while l:
575	if self.Match(l, HEAD_STATE, RE_TOTREV, 1, BAR_STATE, self.SetTotRev):
576	pass
577	elif self.Match(l, BAR_STATE, RE_BAR, 1, REV_STATE, None):
578	pass
579	elif self.Match(l, REV_STATE, RE_REV, 1, DATE_STATE, self.Rev):
580	pass
581	elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date):
582	pass
583	#end
584	l = f.readline()
585	#end
586	c = f.close()
587	if c != None:
588	raise c
589	#end
590	#end
591
592	def Sum1(self):
593	st = os.stat(self.fname)
594	self.rcssize = st.st_size
595	self.Sum1Rlog()
596	if self.totrev != len(self.versions):
597	raise SkipRcsException('wrong version count')
598	#end
599	self.versions.sort()
600	#end
601
602	def Checkout(self,n):
603	v = self.versions[n]
604	out = open(self.Verf(n), "w")
605	cmd = 'co -ko -p%s %s' % (v.vstr, self.fname)
606	total = 0
607	(inf,
608	stream,
609	err) = os.popen3(cmd, "r")
610	inf.close()
611	buf = stream.read()
612	while buf:
613	total = total + len(buf)
614	out.write(buf)
615	buf = stream.read()
616	#end
617	v.vsize = total
618	estr = ''
619	buf = err.read()
620	while buf:
621	estr = estr + buf
622	buf = err.read()
623	#end
624	if stream.close():
625	raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr))
626	#end
627	out.close()
628	err.close()
629	#end
630
631	def Vdate(self,n):
632	return self.versions[n].date
633	#end
634
635	def Vstr(self,n):
636	return self.versions[n].vstr
637	#end
638
639	def Verf(self,n):
640	return os.path.join(TMPDIR, 'input.%d' % n)
641	#end
642
643	def FilePairsByDate(self, runclass):
644	if self.totrev < 2:
645	raise NotEnoughVersions()
646	#end
647	self.Checkout(0)
648	ntrials = []
649	if self.totrev < 2:
650	return vtrials
651	#end
652	for v in range(0,self.totrev-1):
653	if v > 1:
654	os.remove(self.Verf(v-1))
655	#end
656	self.Checkout(v+1)
657	if os.stat(self.Verf(v)).st_size < MIN_SIZE or \
658	os.stat(self.Verf(v+1)).st_size < MIN_SIZE:
659	continue
660	#end
661
662	result = TimedTest(self.Verf(v+1),
663	self.Verf(v),
664	runclass.New())
665
666	target_size = os.stat(self.Verf(v+1)).st_size
667
668	ntrials.append(result)
669	#end
670
671	os.remove(self.Verf(self.totrev-1))
672	os.remove(self.Verf(self.totrev-2))
673	return ntrials
674	#end
675
676	def AppendVersion(self, f, n):
677	self.Checkout(n)
678	rf = open(self.Verf(n), "r")
679	data = rf.read()
680	f.write(data)
681	rf.close()
682	return len(data)
683	#end
684
685	class RcsFinder:
686	def __init__(self):
687	self.subdirs = []
688	self.rcsfiles = []
689	self.others = []
690	self.skipped = []
691	self.biground = 0
692	#end
693
694	def Scan1(self,dir):
695	dents = os.listdir(dir)
696	subdirs = []
697	rcsfiles = []
698	others = []
699	for dent in dents:
700	full = os.path.join(dir, dent)
701	if os.path.isdir(full):
702	subdirs.append(full)
703	elif dent[len(dent)-2:] == ",v":
704	rcsfiles.append(RcsFile(full))
705	else:
706	others.append(full)
707	#end
708	#end
709	self.subdirs = self.subdirs + subdirs
710	self.rcsfiles = self.rcsfiles + rcsfiles
711	self.others = self.others + others
712	return subdirs
713	#end
714
715	def Crawl(self, dir):
716	subdirs = [dir]
717	while subdirs:
718	s1 = self.Scan1(subdirs[0])
719	subdirs = subdirs[1:] + s1
720	#end
721	#end
722
723	def Summarize(self):
724	good = []
725	for rf in self.rcsfiles:
726	try:
727	rf.Sum1()
728	if rf.totrev < 2:
729	raise SkipRcsException('too few versions (< 2)')
730	#end
731	except SkipRcsException, e:
732	#print 'skipping file %s: %s' % (rf.fname, e.reason)
733	self.skipped.append(rf)
734	else:
735	good.append(rf)
736	#end
737	self.rcsfiles = good
738	#end
739
740	def AllPairsByDate(self, runclass):
741	results = []
742	good = []
743	for rf in self.rcsfiles:
744	try:
745	results = results + rf.FilePairsByDate(runclass)
746	except SkipRcsException:
747	print 'file %s has compressed versions: skipping' % (rf.fname)
748	except NotEnoughVersions:
749	print 'testing %s on %s: not enough versions' % (runclass, rf.fname)
750	else:
751	good.append(rf)
752	#end
753	self.rcsfiles = good
754	self.ReportPairs(runclass, results)
755	return results
756	#end
757
758	def ReportPairs(self, name, results):
759	encode_time = 0
760	decode_time = 0
761	encode_size = 0
762	for r in results:
763	encode_time += r.encode_time.mean
764	decode_time += r.decode_time.mean
765	encode_size += r.encode_size
766	#end
767	print '%s rcs: encode %.2f s: decode %.2f s: size %d' % \
768	(name, encode_time, decode_time, encode_size)
769	#end
770
771	def MakeBigFiles(self, rand):
772	f1 = open(TMPDIR + "/big.1", "w")
773	f2 = open(TMPDIR + "/big.2", "w")
774	population = []
775	for file in self.rcsfiles:
776	if len(file.versions) < 2:
777	continue
778	population.append(file)
779	#end
780	f1sz = 0
781	f2sz = 0
782	fcount = int(len(population) * FILE_P)
783	assert fcount > 0
784	for file in rand.sample(population, fcount):
785	m = IGNORE_FILENAME.match(file.fname)
786	if m != None:
787	continue
788	#end
789	r1, r2 = rand.sample(xrange(0, len(file.versions)), 2)
790	f1sz += file.AppendVersion(f1, r1)
791	f2sz += file.AppendVersion(f2, r2)
792	#m.update('%s,%s,%s ' % (file.fname[len(RCSDIR):], file.Vstr(r1), file.Vstr(r2)))
793	#end
794	testkey = 'rcs%d' % self.biground
795	self.biground = self.biground + 1
796
797	print '%s; source %u bytes; target %u bytes' % (testkey, f1sz, f2sz)
798	f1.close()
799	f2.close()
800	return (TMPDIR + "/big.1",
801	TMPDIR + "/big.2",
802	testkey)
803	#end
804
805	def Generator(self):
806	return lambda rand: self.MakeBigFiles(rand)
807	#end
808	#end
809
810	# find a set of RCS files for testing
811	def GetTestRcsFiles():
812	rcsf = RcsFinder()
813	rcsf.Crawl(RCSDIR)
814	if len(rcsf.rcsfiles) == 0:
815	raise CommandError('', 'no RCS files')
816	#end
817	rcsf.Summarize()
818	print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % (len(rcsf.rcsfiles),
819	len(rcsf.subdirs),
820	len(rcsf.others),
821	len(rcsf.skipped))
822	print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize").str
823	print StatList([x.totrev for x in rcsf.rcsfiles], "totrev").str
824	return rcsf
825	#end
826
827	class SampleDataTest:
828	def __init__(self, dirs):
829	self.pairs = []
830	while dirs:
831	d = dirs[0]
832	dirs = dirs[1:]
833	l = os.listdir(d)
834	files = []
835	for e in l:
836	p = os.path.join(d, e)
837	if os.path.isdir(p):
838	dirs.append(p)
839	else:
840	files.append(p)
841	#end
842	#end
843	if len(files) > 1:
844	files.sort()
845	for x in xrange(len(files) - 1):
846	self.pairs.append((files[x], files[x+1],
847	'%s-%s' % (files[x], files[x+1])))
848	#end
849	#end
850	#end
851	#end
852
853	def Generator(self):
854	return lambda rand: rand.choice(self.pairs)
855	#end
856	#end
857
858	# configs are represented as a list of values,
859	# program takes a list of strings:
860	def ConfigToArgs(config):
861	args = [ '-C',
862	','.join([str(x) for x in config[0:SOFT_CONFIG_CNT]])]
863	for i in range(SOFT_CONFIG_CNT, len(CONFIG_ORDER)):
864	key = CONFIG_ARGMAP[CONFIG_ORDER[i]]
865	val = config[i]
866	if val == 'true' or val == 'false':
867	if val == 'true':
868	args.append('%s' % key)
869	#end
870	else:
871	args.append('%s=%s' % (key, val))
872	#end
873	#end
874	return args
875	#end
876
877	#
878	class RandomTest:
879	def __init__(self, tnum, tinput, config, syntuple = None):
880	self.mytinput = tinput[2]
881	self.myconfig = config
882	self.tnum = tnum
883
884	if syntuple != None:
885	self.runtime = syntuple[0]
886	self.compsize = syntuple[1]
887	self.decodetime = None
888	else:
889	args = ConfigToArgs(config)
890	result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args))
891
892	self.runtime = result.encode_time.mean
893	self.compsize = result.encode_size
894	self.decodetime = result.decode_time.mean
895	#end
896
897	self.score = None
898	self.time_pos = None
899	self.size_pos = None
900	self.score_pos = None
901	#end
902
903	def __str__(self):
904	decodestr = ''
905	if not SKIP_DECODE:
906	decodestr = ' %.6f' % self.decodetime
907	#end
908	return 'time %.6f%s size %d%s << %s >>%s' % (
909	self.time(), ((self.time_pos != None) and (" (%s)" % self.time_pos) or ""),
910	self.size(), ((self.size_pos != None) and (" (%s)" % self.size_pos) or ""),
911	c2str(self.config()),
912	decodestr)
913	#end
914
915	def time(self):
916	return self.runtime
917	#end
918
919	def size(self):
920	return self.compsize
921	#end
922
923	def config(self):
924	return self.myconfig
925	#end
926
927	def score(self):
928	return self.score
929	#end
930
931	def tinput(self):
932	return self.mytinput
933	#end
934	#end
935
936	def PosInAlist(l, e):
937	for i in range(0, len(l)):
938	if l[i][1] == e:
939	return i;
940	#end
941	#end
942	return -1
943	#end
944
945	# Generates a set of num_results test configurations, given the list of
946	# retest-configs.
947	def RandomTestConfigs(rand, input_configs, num_results):
948
949	outputs = input_configs[:]
950	have_set = dict([(c,c) for c in input_configs])
951
952	# Compute a random configuration
953	def RandomConfig():
954	config = []
955	cmap = {}
956	for key in CONFIG_ORDER:
957	val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap)
958	config.append(val)
959	#end
960	return tuple(config)
961	#end
962
963	while len(outputs) < num_results:
964	newc = None
965	for i in xrange(10):
966	c = RandomConfig()
967	if have_set.has_key(c):
968	continue
969	#end
970	have_set[c] = c
971	newc = c
972	break
973	if newc is None:
974	print 'stopped looking for configs at %d' % len(outputs)
975	break
976	#end
977	outputs.append(c)
978	#end
979	outputs.sort()
980	return outputs
981	#end
982
983	def RunTestLoop(rand, generator, rounds):
984	configs = []
985	for rnum in xrange(rounds):
986	configs = RandomTestConfigs(rand, configs, MAX_RESULTS)
987	tinput = generator(rand)
988	tests = []
989	for x in xrange(len(configs)):
990	t = RandomTest(x, tinput, configs[x])
991	print 'Round %d test %d: %s' % (rnum, x, t)
992	tests.append(t)
993	#end
994	results = ScoreTests(tests)
995
996	for r in results:
997	c = r.config()
998	if not test_all_config_results.has_key(c):
999	test_all_config_results[c] = [r]
1000	else:
1001	test_all_config_results[c].append(r)
1002	#end
1003	#end
1004
1005	GraphResults('expt%d' % rnum, results)
1006	GraphSummary('sum%d' % rnum, results)
1007
1008	# re-test some fraction
1009	configs = [r.config() for r in results[0:int(MAX_RESULTS * KEEP_P)]]
1010	#end
1011	#end
1012
1013	# TODO: cleanup
1014	test_all_config_results = {}
1015
1016	def ScoreTests(results):
1017	scored = []
1018	timed = []
1019	sized = []
1020
1021	t_min = float(min([test.time() for test in results]))
1022	#t_max = float(max([test.time() for test in results]))
1023	s_min = float(min([test.size() for test in results]))
1024	#s_max = float(max([test.size() for test in results]))
1025
1026	for test in results:
1027
1028	# Hyperbolic function. Smaller scores still better
1029	red = 0.999 # minimum factors for each dimension are 1/1000
1030	test.score = ((test.size() - s_min * red) *
1031	(test.time() - t_min * red))
1032
1033	scored.append((test.score, test))
1034	timed.append((test.time(), test))
1035	sized.append((test.size(), test))
1036	#end
1037
1038	scored.sort()
1039	timed.sort()
1040	sized.sort()
1041
1042	best_by_size = []
1043	best_by_time = []
1044
1045	pos = 0
1046	for (score, test) in scored:
1047	pos += 1
1048	test.score_pos = pos
1049	#end
1050
1051	scored = [x[1] for x in scored]
1052
1053	for test in scored:
1054	test.size_pos = PosInAlist(sized, test)
1055	test.time_pos = PosInAlist(timed, test)
1056	#end
1057
1058	for test in scored:
1059	c = test.config()
1060	s = 0.0
1061	print 'H-Score: %0.9f %s' % (test.score, test)
1062	#end
1063
1064	return scored
1065	#end
1066
1067	def GraphResults(desc, results):
1068	f = open("data-%s.csv" % desc, "w")
1069	for r in results:
1070	f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r))
1071	#end
1072	f.close()
1073	os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc))
1074	#end
1075
1076	def GraphSummary(desc, results_ignore):
1077	test_population = 0
1078	config_ordered = []
1079
1080	# drops duplicate test/config pairs (TODO: don't retest them)
1081	for config, cresults in test_all_config_results.items():
1082	input_config_map = {}
1083	uniq = []
1084	for test in cresults:
1085	assert test.config() == config
1086	test_population += 1
1087	key = test.tinput()
1088	if not input_config_map.has_key(key):
1089	input_config_map[key] = {}
1090	#end
1091	if input_config_map[key].has_key(config):
1092	print 'skipping repeat test %s vs. %s' % (input_config_map[key][config], test)
1093	continue
1094	#end
1095	input_config_map[key][config] = test
1096	uniq.append(test)
1097	#end
1098	config_ordered.append(uniq)
1099	#end
1100
1101	# sort configs descending by number of tests
1102	config_ordered.sort(lambda x, y: len(y) - len(x))
1103
1104	print 'population %d: %d configs %d results' % \
1105	(test_population,
1106	len(config_ordered),
1107	len(config_ordered[0]))
1108
1109	if config_ordered[0] == 1:
1110	return
1111	#end
1112
1113	# a map from test-key to test-list w/ various configs
1114	input_set = {}
1115	osize = len(config_ordered)
1116
1117	for i in xrange(len(config_ordered)):
1118	config = config_ordered[i][0].config()
1119	config_tests = config_ordered[i]
1120
1121	#print '%s has %d tested inputs' % (config, len(config_tests))
1122
1123	if len(input_set) == 0:
1124	input_set = dict([(t.tinput(), [t]) for t in config_tests])
1125	continue
1126	#end
1127
1128	# a map from test-key to test-list w/ various configs
1129	update_set = {}
1130	for r in config_tests:
1131	t = r.tinput()
1132	if input_set.has_key(t):
1133	update_set[t] = input_set[t] + [r]
1134	else:
1135	#print 'config %s does not have test %s' % (config, t)
1136	pass
1137	#end
1138	#end
1139
1140	if len(update_set) <= 1:
1141	break
1142	#end
1143
1144	input_set = update_set
1145
1146	# continue if there are more w/ the same number of inputs
1147	if i < (len(config_ordered) - 1) and \
1148	len(config_ordered[i + 1]) == len(config_tests):
1149	continue
1150	#end
1151
1152	# synthesize results for multi-test inputs
1153	config_num = None
1154
1155	# map of config to sum(various test-keys)
1156	smap = {}
1157	for (key, tests) in input_set.items():
1158	if config_num == None:
1159	# config_num should be the same in all elements
1160	config_num = len(tests)
1161	smap = dict([(r.config(),
1162	(r.time(),
1163	r.size()))
1164	for r in tests])
1165	else:
1166	# compuate the per-config sum of time/size
1167	assert config_num == len(tests)
1168	smap = dict([(r.config(),
1169	(smap[r.config()][0] + r.time(),
1170	smap[r.config()][1] + r.size()))
1171	for r in tests])
1172	#end
1173	#end
1174
1175	if config_num == 1:
1176	continue
1177	#end
1178
1179	if len(input_set) == osize:
1180	break
1181	#end
1182
1183	summary = '%s-%d' % (desc, len(input_set))
1184	osize = len(input_set)
1185
1186	print 'generate %s w/ %d configs' % (summary, config_num)
1187	syn = [RandomTest(0, (None, None, summary), config,
1188	syntuple = (smap[config][0], smap[config][1]))
1189	for config in smap.keys()]
1190	syn = ScoreTests(syn)
1191	#print 'smap is %s' % (smap,)
1192	#print 'syn is %s' % (' and '.join([str(x) for x in syn]))
1193	GraphResults(summary, syn)
1194	#end
1195	#end
1196
1197	if __name__ == "__main__":
1198	try:
1199	RunCommand(['rm', '-rf', TMPDIR])
1200	os.mkdir(TMPDIR)
1201
1202	rcsf = GetTestRcsFiles()
1203	generator = rcsf.Generator()
1204
1205	#sample = SampleDataTest([SAMPLEDIR])
1206	#generator = sample.Generator()
1207
1208	rand = random.Random(135135135135135)
1209	RunTestLoop(rand, generator, TEST_ROUNDS)
1210
1211	#RunSpeedTest()
1212
1213	#x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9']))
1214	#x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-S', 'djw']))
1215	#x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-T']))
1216
1217	#x1r = rcsf.AllPairsByDate(Xdelta1RunClass())
1218
1219	except CommandError:
1220	pass
1221	else:
1222	RunCommand(['rm', '-rf', TMPDIR])
1223	pass
1224	#end
1225	#end

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: nikanabo/current/xdelta/diy/xdelta3-regtest.py@ 407

Download in other formats: