Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log
Repository URL

xdelta3-regtest.py@ 567

Last change on this file since 567 was 185, checked in by geyser, 18 years ago

File size: 32.5 KB

Rev	Line
[185]	1	#!/usr/bin/python2.4
	2	# xdelta 3 - delta compression tools and library
	3	# Copyright (C) 2003, 2006, 2007. Joshua P. MacDonald
	4	#
	5	# This program is free software; you can redistribute it and/or modify
	6	# it under the terms of the GNU General Public License as published by
	7	# the Free Software Foundation; either version 2 of the License, or
	8	# (at your option) any later version.
	9	#
	10	# This program is distributed in the hope that it will be useful,
	11	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	# GNU General Public License for more details.
	14	#
	15	# You should have received a copy of the GNU General Public License
	16	# along with this program; if not, write to the Free Software
	17	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	18
	19	# TODO: test 1.5 vs. greedy
	20
	21	import os, sys, math, re, time, types, array, random
	22	import xdelta3main
	23	import xdelta3
	24
	25	#RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS'
	26	RCSDIR = '/tmp/PRCS_read_copy'
	27	SAMPLEDIR = "/tmp/WESNOTH_tmp/diff"
	28
	29	#RCSDIR = 'G:/jmacd/PRCS/prcs/b'
	30	#SAMPLEDIR = "C:/sample_data/Wesnoth/tar"
	31
	32	#
	33	MIN_SIZE = 0
	34
	35	TIME_TOO_SHORT = 0.050
	36
	37	SKIP_TRIALS = 2
	38	MIN_TRIALS = 3
	39	MAX_TRIALS = 15
	40
	41	SKIP_DECODE = 1
	42
	43	# 10 = fast 1.5 = slow
	44	MIN_STDDEV_PCT = 1.5
	45
	46	# How many results per round
	47	MAX_RESULTS = 500
	48	TEST_ROUNDS = 500
	49	KEEP_P = (0.5)
	50
	51	# For RCS testing, what percent to select
	52	FILE_P = (0.30)
	53
	54	# For run-speed tests
	55	MIN_RUN = 1000 * 1000 * 1
	56	MAX_RUN = 1000 * 1000 * 10
	57
	58	# Testwide defaults
	59	ALL_ARGS = [
	60	# -v
	61	]
	62
	63	# The first 7 args go to -C
	64	SOFT_CONFIG_CNT = 7
	65
	66	CONFIG_ORDER = [ 'large_look',
	67	'large_step',
	68	'small_look',
	69	'small_chain',
	70	'small_lchain',
	71	'max_lazy',
	72	'long_enough',
	73
	74	# > SOFT_CONFIG_CNT
	75	'nocompress',
	76	'winsize',
	77	'srcwinsize',
	78	'sprevsz',
	79	'iopt',
	80	'djw',
	81	'altcode',
	82	]
	83
	84	CONFIG_ARGMAP = {
	85	'winsize' : '-W',
	86	'srcwinsize' : '-B',
	87	'sprevsz' : '-P',
	88	'iopt' : '-I',
	89	'nocompress' : '-N',
	90	'djw' : '-Sdjw',
	91	'altcode' : '-T',
	92	}
	93
	94	def INPUT_SPEC(rand):
	95	return {
	96
	97	# Time/space costs:
	98
	99	# -C 1,2,3,4,5,6,7
	100	'large_look' : lambda d: rand.choice([9]),
	101	'large_step' : lambda d: rand.choice([3, 5, 7, 8, 15]),
	102	'small_chain' : lambda d: rand.choice([40, 10, 4, 1]),
	103	'small_lchain' : lambda d: rand.choice([x for x in [10, 4, 2, 1] if x <= d['small_chain']]),
	104	'max_lazy' : lambda d: rand.choice([9, 18, 27, 36, 72, 108]),
	105	'long_enough' : lambda d: rand.choice([9, 18, 27, 36, 72, 108]),
	106	'small_look' : lambda d: rand.choice([4]),
	107
	108	# -N
	109	'nocompress' : lambda d: rand.choice(['true']),
	110
	111	# -T
	112	'altcode' : lambda d: rand.choice(['false']),
	113
	114	# -S djw
	115	'djw' : lambda d: rand.choice(['false']),
	116
	117	# Memory costs:
	118
	119	# -W
	120	'winsize' : lambda d: 8 * (1<<20),
	121
	122	# -B
	123	'srcwinsize' : lambda d: 64 * (1<<20),
	124
	125	# -I 0 is unlimited
	126	'iopt' : lambda d: 0,
	127
	128	# -P only powers of two
	129	'sprevsz' : lambda d: rand.choice([x * (1<<16) for x in [4]]),
	130	}
	131	#end
	132
	133	#
	134	TMPDIR = '/tmp/xd3regtest.%d' % os.getpid()
	135
	136	RUNFILE = os.path.join(TMPDIR, 'run')
	137	DFILE = os.path.join(TMPDIR, 'output')
	138	RFILE = os.path.join(TMPDIR, 'recon')
	139
	140	HEAD_STATE = 0
	141	BAR_STATE = 1
	142	REV_STATE = 2
	143	DATE_STATE = 3
	144
	145	#
	146	IGNORE_FILENAME = re.compile('.\\.(gif\|jpg).')
	147
	148	# rcs output
	149	RE_TOTREV = re.compile('total revisions: (\\d+)')
	150	RE_BAR = re.compile('----------------------------')
	151	RE_REV = re.compile('revision (.+)')
	152	RE_DATE = re.compile('date: ([^;]+);.*')
	153	# xdelta output
	154	RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)')
	155	RE_EXTCOMP = re.compile('XDELTA ext comp.*')
	156
	157	def c2str(c):
	158	return ' '.join(['%s' % x for x in c])
	159	#end
	160
	161	def SumList(l):
	162	return reduce(lambda x,y: x+y, l)
	163	#end
	164
	165	# returns (total, mean, stddev, q2 (median),
	166	# (q3-q1)/2 ("semi-interquartile range"), max-min (spread))
	167	class StatList:
	168	def __init__(self,l,desc):
	169	cnt = len(l)
	170	assert(cnt > 1)
	171	l.sort()
	172	self.cnt = cnt
	173	self.l = l
	174	self.total = SumList(l)
	175	self.mean = self.total / float(self.cnt)
	176	self.s = math.sqrt(SumList([(x-self.mean) * (x - self.mean) for x in l]) / float(self.cnt-1))
	177	self.q0 = l[0]
	178	self.q1 = l[int(self.cnt/4.0+0.5)]
	179	self.q2 = l[int(self.cnt/2.0+0.5)]
	180	self.q3 = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))]
	181	self.q4 = l[self.cnt-1]+1
	182	self.siqr = (self.q3-self.q1)/2.0;
	183	self.spread = (self.q4-self.q0)
	184	self.str = '%s %d; mean %d; sdev %d; q2 %d; .5(q3-q1) %.1f; spread %d' % \
	185	(desc, self.total, self.mean, self.s, self.q2, self.siqr, self.spread)
	186	#end
	187	#end
	188
	189	def RunCommand(args, ok = [0]):
	190	#print 'run command %s' % (' '.join(args))
	191	p = os.spawnvp(os.P_WAIT, args[0], args)
	192	if p not in ok:
	193	raise CommandError(args, 'exited %d' % p)
	194	#end
	195	#end
	196
	197	def RunCommandIO(args,infn,outfn):
	198	p = os.fork()
	199	if p == 0:
	200	os.dup2(os.open(infn,os.O_RDONLY),0)
	201	os.dup2(os.open(outfn,os.O_CREAT\|os.O_TRUNC\|os.O_WRONLY),1)
	202	os.execvp(args[0], args)
	203	else:
	204	s = os.waitpid(p,0)
	205	o = os.WEXITSTATUS(s[1])
	206	if not os.WIFEXITED(s[1]) or o != 0:
	207	raise CommandError(args, 'exited %d' % o)
	208	#end
	209	#end
	210	#end
	211
	212	class TimedTest:
	213	def __init__(self, target, source, runnable,
	214	skip_trials = SKIP_TRIALS,
	215	min_trials = MIN_TRIALS,
	216	max_trials = MAX_TRIALS,
	217	min_stddev_pct = MIN_STDDEV_PCT):
	218	self.target = target
	219	self.source = source
	220	self.runnable = runnable
	221
	222	self.skip_trials = skip_trials
	223	self.min_trials = min(min_trials, max_trials)
	224	self.max_trials = max_trials
	225	self.min_stddev_pct = min_stddev_pct
	226
	227	self.encode_time = self.DoTest(DFILE,
	228	lambda x: x.Encode(self.target, self.source, DFILE))
	229	self.encode_size = runnable.EncodeSize(DFILE)
	230
	231	if SKIP_DECODE:
	232	self.decode_time = StatList([1, 1], 'not decoded')
	233	return
	234	#end
	235
	236	self.decode_time = self.DoTest(RFILE,
	237	lambda x: x.Decode(DFILE, self.source, RFILE),
	238	)
	239
	240	# verify
	241	runnable.Verify(self.target, RFILE)
	242	#end
	243
	244	def DoTest(self, fname, func):
	245	trials = 0
	246	measured = []
	247
	248	while 1:
	249	try:
	250	os.remove(fname)
	251	except OSError:
	252	pass
	253
	254	start_time = time.time()
	255	start_clock = time.clock()
	256
	257	func(self.runnable)
	258
	259	total_clock = (time.clock() - start_clock)
	260	total_time = (time.time() - start_time)
	261
	262	elap_time = max(total_time, 0.0000001)
	263	elap_clock = max(total_clock, 0.0000001)
	264
	265	trials = trials + 1
	266
	267	# skip some of the first trials
	268	if trials > self.skip_trials:
	269	measured.append((elap_clock, elap_time))
	270	#print 'measurement total: %.1f ms' % (total_time * 1000.0)
	271
	272	# at least so many
	273	if trials < (self.skip_trials + self.min_trials):
	274	#print 'continue: need more trials: %d' % trials
	275	continue
	276
	277	# compute %variance
	278	done = 0
	279	if self.skip_trials + self.min_trials <= 2:
	280	measured = measured + measured;
	281	done = 1
	282	#end
	283
	284	time_stat = StatList([x[1] for x in measured], 'elap time')
	285	sp = float(time_stat.s) / float(time_stat.mean)
	286
	287	# what if MAX_TRIALS is exceeded?
	288	too_many = (trials - self.skip_trials) >= self.max_trials
	289	good = (100.0 * sp) < self.min_stddev_pct
	290	if done or too_many or good:
	291	trials = trials - self.skip_trials
	292	if not done and not good:
	293	#print 'too many trials: %d' % trials
	294	pass
	295	#clock = StatList([x[0] for x in measured], 'elap clock')
	296	return time_stat
	297	#end
	298	#end
	299	#end
	300	#end
	301
	302	def Decimals(start, end):
	303	l = []
	304	step = start
	305	while 1:
	306	r = range(step, step * 10, step)
	307	l = l + r
	308	if step * 10 >= end:
	309	l.append(step * 10)
	310	break
	311	step = step * 10
	312	return l
	313	#end
	314
	315	# This tests the raw speed of 0-byte inputs
	316	def RunSpeedTest():
	317	for L in Decimals(MIN_RUN, MAX_RUN):
	318	SetFileSize(RUNFILE, L)
	319
	320	trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<20)]))
	321	ReportSpeed(L, trx, '1MB ')
	322
	323	trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<19)]))
	324	ReportSpeed(L, trx, '512k')
	325
	326	trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<18)]))
	327	ReportSpeed(L, trx, '256k')
	328
	329	trm = TimedTest(RUNFILE, None, Xdelta3Mod1(RUNFILE))
	330	ReportSpeed(L, trm, 'swig')
	331
	332	trg = TimedTest(RUNFILE, None, GzipRun1())
	333	ReportSpeed(L,trg,'gzip')
	334	#end
	335	#end
	336
	337	def SetFileSize(F,L):
	338	fd = os.open(F, os.O_CREAT \| os.O_WRONLY)
	339	os.ftruncate(fd,L)
	340	assert os.fstat(fd).st_size == L
	341	os.close(fd)
	342	#end
	343
	344	def ReportSpeed(L,tr,desc):
	345	print '%s run length %u: size %u: time %.3f ms: decode %.3f ms' % \
	346	(desc, L,
	347	tr.encode_size,
	348	tr.encode_time.mean * 1000.0,
	349	tr.decode_time.mean * 1000.0)
	350	#end
	351
	352	class Xdelta3RunClass:
	353	def __init__(self, extra):
	354	self.extra = extra
	355	#end
	356
	357	def __str__(self):
	358	return ' '.join(self.extra)
	359	#end
	360
	361	def New(self):
	362	return Xdelta3Runner(self.extra)
	363	#end
	364	#end
	365
	366	class Xdelta3Runner:
	367	def __init__(self, extra):
	368	self.extra = extra
	369	#end
	370
	371	def Encode(self, target, source, output):
	372	args = (ALL_ARGS +
	373	self.extra +
	374	['-e'])
	375	if source:
	376	args.append('-s')
	377	args.append(source)
	378	#end
	379	args = args + [target, output]
	380	self.Main(args)
	381	#end
	382
	383	def Decode(self, input, source, output):
	384	args = (ALL_ARGS +
	385	['-d'])
	386	if source:
	387	args.append('-s')
	388	args.append(source)
	389	#end
	390	args = args + [input, output]
	391	self.Main(args)
	392	#end
	393
	394	def Verify(self, target, recon):
	395	RunCommand(('cmp', target, recon))
	396	#end
	397
	398	def EncodeSize(self, output):
	399	return os.stat(output).st_size
	400	#end
	401
	402	def Main(self, args):
	403	try:
	404	xdelta3main.main(args)
	405	except Exception, e:
	406	raise CommandError(args, "xdelta3.main exception")
	407	#end
	408	#end
	409	#end
	410
	411	class Xdelta3Mod1:
	412	def __init__(self, file):
	413	self.target_data = open(file, 'r').read()
	414	#end
	415
	416	def Encode(self, ignore1, ignore2, ignore3):
	417	r1, encoded = xdelta3.xd3_encode_memory(self.target_data, None, 1000000, 1<<10)
	418	if r1 != 0:
	419	raise CommandError('memory', 'encode failed: %s' % r1)
	420	#end
	421	self.encoded = encoded
	422	#end
	423
	424	def Decode(self, ignore1, ignore2, ignore3):
	425	r2, data1 = xdelta3.xd3_decode_memory(self.encoded, None, len(self.target_data))
	426	if r2 != 0:
	427	raise CommandError('memory', 'decode failed: %s' % r1)
	428	#end
	429	self.decoded = data1
	430	#end
	431
	432	def Verify(self, ignore1, ignore2):
	433	if self.target_data != self.decoded:
	434	raise CommandError('memory', 'bad decode')
	435	#end
	436	#end
	437
	438	def EncodeSize(self, ignore1):
	439	return len(self.encoded)
	440	#end
	441	#end
	442
	443	class GzipRun1:
	444	def Encode(self, target, source, output):
	445	assert source == None
	446	RunCommandIO(['gzip', '-cf'], target, output)
	447	#end
	448
	449	def Decode(self, input, source, output):
	450	assert source == None
	451	RunCommandIO(['gzip', '-dcf'], input, output)
	452	#end
	453
	454	def Verify(self, target, recon):
	455	RunCommand(('cmp', target, recon))
	456	#end
	457
	458	def EncodeSize(self, output):
	459	return os.stat(output).st_size
	460	#end
	461	#end
	462
	463	class Xdelta1RunClass:
	464	def __str__(self):
	465	return 'xdelta1'
	466	#end
	467
	468	def New(self):
	469	return Xdelta1Runner()
	470	#end
	471	#end
	472
	473	class Xdelta1Runner:
	474	def Encode(self, target, source, output):
	475	assert source != None
	476	args = ['xdelta1', 'delta', '-q', source, target, output]
	477	RunCommand(args, [0, 1])
	478	#end
	479
	480	def Decode(self, input, source, output):
	481	assert source != None
	482	args = ['xdelta1', 'patch', '-q', input, source, output]
	483	# Note: for dumb historical reasons, xdelta1 returns 1 or 0
	484	RunCommand(args)
	485	#end
	486
	487	def Verify(self, target, recon):
	488	RunCommand(('cmp', target, recon))
	489	#end
	490
	491	def EncodeSize(self, output):
	492	return os.stat(output).st_size
	493	#end
	494	#end
	495
	496	# exceptions
	497	class SkipRcsException:
	498	def __init__(self,reason):
	499	self.reason = reason
	500	#end
	501	#end
	502
	503	class NotEnoughVersions:
	504	def __init__(self):
	505	pass
	506	#end
	507	#end
	508
	509	class CommandError:
	510	def __init__(self,cmd,str):
	511	if type(cmd) is types.TupleType or \
	512	type(cmd) is types.ListType:
	513	cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd)
	514	#end
	515	print 'command was: ',cmd
	516	print 'command failed: ',str
	517	print 'have fun debugging'
	518	#end
	519	#end
	520
	521	class RcsVersion:
	522	def __init__(self,vstr):
	523	self.vstr = vstr
	524	#end
	525	def __cmp__(self,other):
	526	return cmp(self.date, other.date)
	527	#end
	528	def __str__(self):
	529	return str(self.vstr)
	530	#end
	531	#end
	532
	533	class RcsFile:
	534
	535	def __init__(self, fname):
	536	self.fname = fname
	537	self.versions = []
	538	self.state = HEAD_STATE
	539	#end
	540
	541	def SetTotRev(self,s):
	542	self.totrev = int(s)
	543	#end
	544
	545	def Rev(self,s):
	546	self.rev = RcsVersion(s)
	547	if len(self.versions) >= self.totrev:
	548	raise SkipRcsException('too many versions (in log messages)')
	549	#end
	550	self.versions.append(self.rev)
	551	#end
	552
	553	def Date(self,s):
	554	self.rev.date = s
	555	#end
	556
	557	def Match(self, line, state, rx, gp, newstate, f):
	558	if state == self.state:
	559	m = rx.match(line)
	560	if m:
	561	if f:
	562	f(m.group(gp))
	563	#end
	564	self.state = newstate
	565	return 1
	566	#end
	567	#end
	568	return None
	569	#end
	570
	571	def Sum1Rlog(self):
	572	f = os.popen('rlog '+self.fname, "r")
	573	l = f.readline()
	574	while l:
	575	if self.Match(l, HEAD_STATE, RE_TOTREV, 1, BAR_STATE, self.SetTotRev):
	576	pass
	577	elif self.Match(l, BAR_STATE, RE_BAR, 1, REV_STATE, None):
	578	pass
	579	elif self.Match(l, REV_STATE, RE_REV, 1, DATE_STATE, self.Rev):
	580	pass
	581	elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date):
	582	pass
	583	#end
	584	l = f.readline()
	585	#end
	586	c = f.close()
	587	if c != None:
	588	raise c
	589	#end
	590	#end
	591
	592	def Sum1(self):
	593	st = os.stat(self.fname)
	594	self.rcssize = st.st_size
	595	self.Sum1Rlog()
	596	if self.totrev != len(self.versions):
	597	raise SkipRcsException('wrong version count')
	598	#end
	599	self.versions.sort()
	600	#end
	601
	602	def Checkout(self,n):
	603	v = self.versions[n]
	604	out = open(self.Verf(n), "w")
	605	cmd = 'co -ko -p%s %s' % (v.vstr, self.fname)
	606	total = 0
	607	(inf,
	608	stream,
	609	err) = os.popen3(cmd, "r")
	610	inf.close()
	611	buf = stream.read()
	612	while buf:
	613	total = total + len(buf)
	614	out.write(buf)
	615	buf = stream.read()
	616	#end
	617	v.vsize = total
	618	estr = ''
	619	buf = err.read()
	620	while buf:
	621	estr = estr + buf
	622	buf = err.read()
	623	#end
	624	if stream.close():
	625	raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr))
	626	#end
	627	out.close()
	628	err.close()
	629	#end
	630
	631	def Vdate(self,n):
	632	return self.versions[n].date
	633	#end
	634
	635	def Vstr(self,n):
	636	return self.versions[n].vstr
	637	#end
	638
	639	def Verf(self,n):
	640	return os.path.join(TMPDIR, 'input.%d' % n)
	641	#end
	642
	643	def FilePairsByDate(self, runclass):
	644	if self.totrev < 2:
	645	raise NotEnoughVersions()
	646	#end
	647	self.Checkout(0)
	648	ntrials = []
	649	if self.totrev < 2:
	650	return vtrials
	651	#end
	652	for v in range(0,self.totrev-1):
	653	if v > 1:
	654	os.remove(self.Verf(v-1))
	655	#end
	656	self.Checkout(v+1)
	657	if os.stat(self.Verf(v)).st_size < MIN_SIZE or \
	658	os.stat(self.Verf(v+1)).st_size < MIN_SIZE:
	659	continue
	660	#end
	661
	662	result = TimedTest(self.Verf(v+1),
	663	self.Verf(v),
	664	runclass.New())
	665
	666	target_size = os.stat(self.Verf(v+1)).st_size
	667
	668	ntrials.append(result)
	669	#end
	670
	671	os.remove(self.Verf(self.totrev-1))
	672	os.remove(self.Verf(self.totrev-2))
	673	return ntrials
	674	#end
	675
	676	def AppendVersion(self, f, n):
	677	self.Checkout(n)
	678	rf = open(self.Verf(n), "r")
	679	data = rf.read()
	680	f.write(data)
	681	rf.close()
	682	return len(data)
	683	#end
	684
	685	class RcsFinder:
	686	def __init__(self):
	687	self.subdirs = []
	688	self.rcsfiles = []
	689	self.others = []
	690	self.skipped = []
	691	self.biground = 0
	692	#end
	693
	694	def Scan1(self,dir):
	695	dents = os.listdir(dir)
	696	subdirs = []
	697	rcsfiles = []
	698	others = []
	699	for dent in dents:
	700	full = os.path.join(dir, dent)
	701	if os.path.isdir(full):
	702	subdirs.append(full)
	703	elif dent[len(dent)-2:] == ",v":
	704	rcsfiles.append(RcsFile(full))
	705	else:
	706	others.append(full)
	707	#end
	708	#end
	709	self.subdirs = self.subdirs + subdirs
	710	self.rcsfiles = self.rcsfiles + rcsfiles
	711	self.others = self.others + others
	712	return subdirs
	713	#end
	714
	715	def Crawl(self, dir):
	716	subdirs = [dir]
	717	while subdirs:
	718	s1 = self.Scan1(subdirs[0])
	719	subdirs = subdirs[1:] + s1
	720	#end
	721	#end
	722
	723	def Summarize(self):
	724	good = []
	725	for rf in self.rcsfiles:
	726	try:
	727	rf.Sum1()
	728	if rf.totrev < 2:
	729	raise SkipRcsException('too few versions (< 2)')
	730	#end
	731	except SkipRcsException, e:
	732	#print 'skipping file %s: %s' % (rf.fname, e.reason)
	733	self.skipped.append(rf)
	734	else:
	735	good.append(rf)
	736	#end
	737	self.rcsfiles = good
	738	#end
	739
	740	def AllPairsByDate(self, runclass):
	741	results = []
	742	good = []
	743	for rf in self.rcsfiles:
	744	try:
	745	results = results + rf.FilePairsByDate(runclass)
	746	except SkipRcsException:
	747	print 'file %s has compressed versions: skipping' % (rf.fname)
	748	except NotEnoughVersions:
	749	print 'testing %s on %s: not enough versions' % (runclass, rf.fname)
	750	else:
	751	good.append(rf)
	752	#end
	753	self.rcsfiles = good
	754	self.ReportPairs(runclass, results)
	755	return results
	756	#end
	757
	758	def ReportPairs(self, name, results):
	759	encode_time = 0
	760	decode_time = 0
	761	encode_size = 0
	762	for r in results:
	763	encode_time += r.encode_time.mean
	764	decode_time += r.decode_time.mean
	765	encode_size += r.encode_size
	766	#end
	767	print '%s rcs: encode %.2f s: decode %.2f s: size %d' % \
	768	(name, encode_time, decode_time, encode_size)
	769	#end
	770
	771	def MakeBigFiles(self, rand):
	772	f1 = open(TMPDIR + "/big.1", "w")
	773	f2 = open(TMPDIR + "/big.2", "w")
	774	population = []
	775	for file in self.rcsfiles:
	776	if len(file.versions) < 2:
	777	continue
	778	population.append(file)
	779	#end
	780	f1sz = 0
	781	f2sz = 0
	782	fcount = int(len(population) * FILE_P)
	783	assert fcount > 0
	784	for file in rand.sample(population, fcount):
	785	m = IGNORE_FILENAME.match(file.fname)
	786	if m != None:
	787	continue
	788	#end
	789	r1, r2 = rand.sample(xrange(0, len(file.versions)), 2)
	790	f1sz += file.AppendVersion(f1, r1)
	791	f2sz += file.AppendVersion(f2, r2)
	792	#m.update('%s,%s,%s ' % (file.fname[len(RCSDIR):], file.Vstr(r1), file.Vstr(r2)))
	793	#end
	794	testkey = 'rcs%d' % self.biground
	795	self.biground = self.biground + 1
	796
	797	print '%s; source %u bytes; target %u bytes' % (testkey, f1sz, f2sz)
	798	f1.close()
	799	f2.close()
	800	return (TMPDIR + "/big.1",
	801	TMPDIR + "/big.2",
	802	testkey)
	803	#end
	804
	805	def Generator(self):
	806	return lambda rand: self.MakeBigFiles(rand)
	807	#end
	808	#end
	809
	810	# find a set of RCS files for testing
	811	def GetTestRcsFiles():
	812	rcsf = RcsFinder()
	813	rcsf.Crawl(RCSDIR)
	814	if len(rcsf.rcsfiles) == 0:
	815	raise CommandError('', 'no RCS files')
	816	#end
	817	rcsf.Summarize()
	818	print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % (len(rcsf.rcsfiles),
	819	len(rcsf.subdirs),
	820	len(rcsf.others),
	821	len(rcsf.skipped))
	822	print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize").str
	823	print StatList([x.totrev for x in rcsf.rcsfiles], "totrev").str
	824	return rcsf
	825	#end
	826
	827	class SampleDataTest:
	828	def __init__(self, dirs):
	829	self.pairs = []
	830	while dirs:
	831	d = dirs[0]
	832	dirs = dirs[1:]
	833	l = os.listdir(d)
	834	files = []
	835	for e in l:
	836	p = os.path.join(d, e)
	837	if os.path.isdir(p):
	838	dirs.append(p)
	839	else:
	840	files.append(p)
	841	#end
	842	#end
	843	if len(files) > 1:
	844	files.sort()
	845	for x in xrange(len(files) - 1):
	846	self.pairs.append((files[x], files[x+1],
	847	'%s-%s' % (files[x], files[x+1])))
	848	#end
	849	#end
	850	#end
	851	#end
	852
	853	def Generator(self):
	854	return lambda rand: rand.choice(self.pairs)
	855	#end
	856	#end
	857
	858	# configs are represented as a list of values,
	859	# program takes a list of strings:
	860	def ConfigToArgs(config):
	861	args = [ '-C',
	862	','.join([str(x) for x in config[0:SOFT_CONFIG_CNT]])]
	863	for i in range(SOFT_CONFIG_CNT, len(CONFIG_ORDER)):
	864	key = CONFIG_ARGMAP[CONFIG_ORDER[i]]
	865	val = config[i]
	866	if val == 'true' or val == 'false':
	867	if val == 'true':
	868	args.append('%s' % key)
	869	#end
	870	else:
	871	args.append('%s=%s' % (key, val))
	872	#end
	873	#end
	874	return args
	875	#end
	876
	877	#
	878	class RandomTest:
	879	def __init__(self, tnum, tinput, config, syntuple = None):
	880	self.mytinput = tinput[2]
	881	self.myconfig = config
	882	self.tnum = tnum
	883
	884	if syntuple != None:
	885	self.runtime = syntuple[0]
	886	self.compsize = syntuple[1]
	887	self.decodetime = None
	888	else:
	889	args = ConfigToArgs(config)
	890	result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args))
	891
	892	self.runtime = result.encode_time.mean
	893	self.compsize = result.encode_size
	894	self.decodetime = result.decode_time.mean
	895	#end
	896
	897	self.score = None
	898	self.time_pos = None
	899	self.size_pos = None
	900	self.score_pos = None
	901	#end
	902
	903	def __str__(self):
	904	decodestr = ''
	905	if not SKIP_DECODE:
	906	decodestr = ' %.6f' % self.decodetime
	907	#end
	908	return 'time %.6f%s size %d%s << %s >>%s' % (
	909	self.time(), ((self.time_pos != None) and (" (%s)" % self.time_pos) or ""),
	910	self.size(), ((self.size_pos != None) and (" (%s)" % self.size_pos) or ""),
	911	c2str(self.config()),
	912	decodestr)
	913	#end
	914
	915	def time(self):
	916	return self.runtime
	917	#end
	918
	919	def size(self):
	920	return self.compsize
	921	#end
	922
	923	def config(self):
	924	return self.myconfig
	925	#end
	926
	927	def score(self):
	928	return self.score
	929	#end
	930
	931	def tinput(self):
	932	return self.mytinput
	933	#end
	934	#end
	935
	936	def PosInAlist(l, e):
	937	for i in range(0, len(l)):
	938	if l[i][1] == e:
	939	return i;
	940	#end
	941	#end
	942	return -1
	943	#end
	944
	945	# Generates a set of num_results test configurations, given the list of
	946	# retest-configs.
	947	def RandomTestConfigs(rand, input_configs, num_results):
	948
	949	outputs = input_configs[:]
	950	have_set = dict([(c,c) for c in input_configs])
	951
	952	# Compute a random configuration
	953	def RandomConfig():
	954	config = []
	955	cmap = {}
	956	for key in CONFIG_ORDER:
	957	val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap)
	958	config.append(val)
	959	#end
	960	return tuple(config)
	961	#end
	962
	963	while len(outputs) < num_results:
	964	newc = None
	965	for i in xrange(10):
	966	c = RandomConfig()
	967	if have_set.has_key(c):
	968	continue
	969	#end
	970	have_set[c] = c
	971	newc = c
	972	break
	973	if newc is None:
	974	print 'stopped looking for configs at %d' % len(outputs)
	975	break
	976	#end
	977	outputs.append(c)
	978	#end
	979	outputs.sort()
	980	return outputs
	981	#end
	982
	983	def RunTestLoop(rand, generator, rounds):
	984	configs = []
	985	for rnum in xrange(rounds):
	986	configs = RandomTestConfigs(rand, configs, MAX_RESULTS)
	987	tinput = generator(rand)
	988	tests = []
	989	for x in xrange(len(configs)):
	990	t = RandomTest(x, tinput, configs[x])
	991	print 'Round %d test %d: %s' % (rnum, x, t)
	992	tests.append(t)
	993	#end
	994	results = ScoreTests(tests)
	995
	996	for r in results:
	997	c = r.config()
	998	if not test_all_config_results.has_key(c):
	999	test_all_config_results[c] = [r]
	1000	else:
	1001	test_all_config_results[c].append(r)
	1002	#end
	1003	#end
	1004
	1005	GraphResults('expt%d' % rnum, results)
	1006	GraphSummary('sum%d' % rnum, results)
	1007
	1008	# re-test some fraction
	1009	configs = [r.config() for r in results[0:int(MAX_RESULTS * KEEP_P)]]
	1010	#end
	1011	#end
	1012
	1013	# TODO: cleanup
	1014	test_all_config_results = {}
	1015
	1016	def ScoreTests(results):
	1017	scored = []
	1018	timed = []
	1019	sized = []
	1020
	1021	t_min = float(min([test.time() for test in results]))
	1022	#t_max = float(max([test.time() for test in results]))
	1023	s_min = float(min([test.size() for test in results]))
	1024	#s_max = float(max([test.size() for test in results]))
	1025
	1026	for test in results:
	1027
	1028	# Hyperbolic function. Smaller scores still better
	1029	red = 0.999 # minimum factors for each dimension are 1/1000
	1030	test.score = ((test.size() - s_min * red) *
	1031	(test.time() - t_min * red))
	1032
	1033	scored.append((test.score, test))
	1034	timed.append((test.time(), test))
	1035	sized.append((test.size(), test))
	1036	#end
	1037
	1038	scored.sort()
	1039	timed.sort()
	1040	sized.sort()
	1041
	1042	best_by_size = []
	1043	best_by_time = []
	1044
	1045	pos = 0
	1046	for (score, test) in scored:
	1047	pos += 1
	1048	test.score_pos = pos
	1049	#end
	1050
	1051	scored = [x[1] for x in scored]
	1052
	1053	for test in scored:
	1054	test.size_pos = PosInAlist(sized, test)
	1055	test.time_pos = PosInAlist(timed, test)
	1056	#end
	1057
	1058	for test in scored:
	1059	c = test.config()
	1060	s = 0.0
	1061	print 'H-Score: %0.9f %s' % (test.score, test)
	1062	#end
	1063
	1064	return scored
	1065	#end
	1066
	1067	def GraphResults(desc, results):
	1068	f = open("data-%s.csv" % desc, "w")
	1069	for r in results:
	1070	f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r))
	1071	#end
	1072	f.close()
	1073	os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc))
	1074	#end
	1075
	1076	def GraphSummary(desc, results_ignore):
	1077	test_population = 0
	1078	config_ordered = []
	1079
	1080	# drops duplicate test/config pairs (TODO: don't retest them)
	1081	for config, cresults in test_all_config_results.items():
	1082	input_config_map = {}
	1083	uniq = []
	1084	for test in cresults:
	1085	assert test.config() == config
	1086	test_population += 1
	1087	key = test.tinput()
	1088	if not input_config_map.has_key(key):
	1089	input_config_map[key] = {}
	1090	#end
	1091	if input_config_map[key].has_key(config):
	1092	print 'skipping repeat test %s vs. %s' % (input_config_map[key][config], test)
	1093	continue
	1094	#end
	1095	input_config_map[key][config] = test
	1096	uniq.append(test)
	1097	#end
	1098	config_ordered.append(uniq)
	1099	#end
	1100
	1101	# sort configs descending by number of tests
	1102	config_ordered.sort(lambda x, y: len(y) - len(x))
	1103
	1104	print 'population %d: %d configs %d results' % \
	1105	(test_population,
	1106	len(config_ordered),
	1107	len(config_ordered[0]))
	1108
	1109	if config_ordered[0] == 1:
	1110	return
	1111	#end
	1112
	1113	# a map from test-key to test-list w/ various configs
	1114	input_set = {}
	1115	osize = len(config_ordered)
	1116
	1117	for i in xrange(len(config_ordered)):
	1118	config = config_ordered[i][0].config()
	1119	config_tests = config_ordered[i]
	1120
	1121	#print '%s has %d tested inputs' % (config, len(config_tests))
	1122
	1123	if len(input_set) == 0:
	1124	input_set = dict([(t.tinput(), [t]) for t in config_tests])
	1125	continue
	1126	#end
	1127
	1128	# a map from test-key to test-list w/ various configs
	1129	update_set = {}
	1130	for r in config_tests:
	1131	t = r.tinput()
	1132	if input_set.has_key(t):
	1133	update_set[t] = input_set[t] + [r]
	1134	else:
	1135	#print 'config %s does not have test %s' % (config, t)
	1136	pass
	1137	#end
	1138	#end
	1139
	1140	if len(update_set) <= 1:
	1141	break
	1142	#end
	1143
	1144	input_set = update_set
	1145
	1146	# continue if there are more w/ the same number of inputs
	1147	if i < (len(config_ordered) - 1) and \
	1148	len(config_ordered[i + 1]) == len(config_tests):
	1149	continue
	1150	#end
	1151
	1152	# synthesize results for multi-test inputs
	1153	config_num = None
	1154
	1155	# map of config to sum(various test-keys)
	1156	smap = {}
	1157	for (key, tests) in input_set.items():
	1158	if config_num == None:
	1159	# config_num should be the same in all elements
	1160	config_num = len(tests)
	1161	smap = dict([(r.config(),
	1162	(r.time(),
	1163	r.size()))
	1164	for r in tests])
	1165	else:
	1166	# compuate the per-config sum of time/size
	1167	assert config_num == len(tests)
	1168	smap = dict([(r.config(),
	1169	(smap[r.config()][0] + r.time(),
	1170	smap[r.config()][1] + r.size()))
	1171	for r in tests])
	1172	#end
	1173	#end
	1174
	1175	if config_num == 1:
	1176	continue
	1177	#end
	1178
	1179	if len(input_set) == osize:
	1180	break
	1181	#end
	1182
	1183	summary = '%s-%d' % (desc, len(input_set))
	1184	osize = len(input_set)
	1185
	1186	print 'generate %s w/ %d configs' % (summary, config_num)
	1187	syn = [RandomTest(0, (None, None, summary), config,
	1188	syntuple = (smap[config][0], smap[config][1]))
	1189	for config in smap.keys()]
	1190	syn = ScoreTests(syn)
	1191	#print 'smap is %s' % (smap,)
	1192	#print 'syn is %s' % (' and '.join([str(x) for x in syn]))
	1193	GraphResults(summary, syn)
	1194	#end
	1195	#end
	1196
	1197	if __name__ == "__main__":
	1198	try:
	1199	RunCommand(['rm', '-rf', TMPDIR])
	1200	os.mkdir(TMPDIR)
	1201
	1202	rcsf = GetTestRcsFiles()
	1203	generator = rcsf.Generator()
	1204
	1205	#sample = SampleDataTest([SAMPLEDIR])
	1206	#generator = sample.Generator()
	1207
	1208	rand = random.Random(135135135135135)
	1209	RunTestLoop(rand, generator, TEST_ROUNDS)
	1210
	1211	#RunSpeedTest()
	1212
	1213	#x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9']))
	1214	#x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-S', 'djw']))
	1215	#x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-T']))
	1216
	1217	#x1r = rcsf.AllPairsByDate(Xdelta1RunClass())
	1218
	1219	except CommandError:
	1220	pass
	1221	else:
	1222	RunCommand(['rm', '-rf', TMPDIR])
	1223	pass
	1224	#end
	1225	#end

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: nikanabo/current/xdelta/diy/xdelta3-regtest.py@ 567

Download in other formats: