source: nikanabo/current/xdelta/diy/xdelta3-regtest.py@ 1123

Last change on this file since 1123 was 185, checked in by geyser, 18 years ago
File size: 32.5 KB
Line 
1#!/usr/bin/python2.4
2# xdelta 3 - delta compression tools and library
3# Copyright (C) 2003, 2006, 2007. Joshua P. MacDonald
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 2 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program; if not, write to the Free Software
17# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19# TODO: test 1.5 vs. greedy
20
21import os, sys, math, re, time, types, array, random
22import xdelta3main
23import xdelta3
24
25#RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS'
26RCSDIR = '/tmp/PRCS_read_copy'
27SAMPLEDIR = "/tmp/WESNOTH_tmp/diff"
28
29#RCSDIR = 'G:/jmacd/PRCS/prcs/b'
30#SAMPLEDIR = "C:/sample_data/Wesnoth/tar"
31
32#
33MIN_SIZE = 0
34
35TIME_TOO_SHORT = 0.050
36
37SKIP_TRIALS = 2
38MIN_TRIALS = 3
39MAX_TRIALS = 15
40
41SKIP_DECODE = 1
42
43# 10 = fast 1.5 = slow
44MIN_STDDEV_PCT = 1.5
45
46# How many results per round
47MAX_RESULTS = 500
48TEST_ROUNDS = 500
49KEEP_P = (0.5)
50
51# For RCS testing, what percent to select
52FILE_P = (0.30)
53
54# For run-speed tests
55MIN_RUN = 1000 * 1000 * 1
56MAX_RUN = 1000 * 1000 * 10
57
58# Testwide defaults
59ALL_ARGS = [
60 # -v
61 ]
62
63# The first 7 args go to -C
64SOFT_CONFIG_CNT = 7
65
66CONFIG_ORDER = [ 'large_look',
67 'large_step',
68 'small_look',
69 'small_chain',
70 'small_lchain',
71 'max_lazy',
72 'long_enough',
73
74 # > SOFT_CONFIG_CNT
75 'nocompress',
76 'winsize',
77 'srcwinsize',
78 'sprevsz',
79 'iopt',
80 'djw',
81 'altcode',
82 ]
83
84CONFIG_ARGMAP = {
85 'winsize' : '-W',
86 'srcwinsize' : '-B',
87 'sprevsz' : '-P',
88 'iopt' : '-I',
89 'nocompress' : '-N',
90 'djw' : '-Sdjw',
91 'altcode' : '-T',
92 }
93
94def INPUT_SPEC(rand):
95 return {
96
97 # Time/space costs:
98
99 # -C 1,2,3,4,5,6,7
100 'large_look' : lambda d: rand.choice([9]),
101 'large_step' : lambda d: rand.choice([3, 5, 7, 8, 15]),
102 'small_chain' : lambda d: rand.choice([40, 10, 4, 1]),
103 'small_lchain' : lambda d: rand.choice([x for x in [10, 4, 2, 1] if x <= d['small_chain']]),
104 'max_lazy' : lambda d: rand.choice([9, 18, 27, 36, 72, 108]),
105 'long_enough' : lambda d: rand.choice([9, 18, 27, 36, 72, 108]),
106 'small_look' : lambda d: rand.choice([4]),
107
108 # -N
109 'nocompress' : lambda d: rand.choice(['true']),
110
111 # -T
112 'altcode' : lambda d: rand.choice(['false']),
113
114 # -S djw
115 'djw' : lambda d: rand.choice(['false']),
116
117 # Memory costs:
118
119 # -W
120 'winsize' : lambda d: 8 * (1<<20),
121
122 # -B
123 'srcwinsize' : lambda d: 64 * (1<<20),
124
125 # -I 0 is unlimited
126 'iopt' : lambda d: 0,
127
128 # -P only powers of two
129 'sprevsz' : lambda d: rand.choice([x * (1<<16) for x in [4]]),
130 }
131#end
132
133#
134TMPDIR = '/tmp/xd3regtest.%d' % os.getpid()
135
136RUNFILE = os.path.join(TMPDIR, 'run')
137DFILE = os.path.join(TMPDIR, 'output')
138RFILE = os.path.join(TMPDIR, 'recon')
139
140HEAD_STATE = 0
141BAR_STATE = 1
142REV_STATE = 2
143DATE_STATE = 3
144
145#
146IGNORE_FILENAME = re.compile('.*\\.(gif|jpg).*')
147
148# rcs output
149RE_TOTREV = re.compile('total revisions: (\\d+)')
150RE_BAR = re.compile('----------------------------')
151RE_REV = re.compile('revision (.+)')
152RE_DATE = re.compile('date: ([^;]+);.*')
153# xdelta output
154RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)')
155RE_EXTCOMP = re.compile('XDELTA ext comp.*')
156
157def c2str(c):
158 return ' '.join(['%s' % x for x in c])
159#end
160
161def SumList(l):
162 return reduce(lambda x,y: x+y, l)
163#end
164
165# returns (total, mean, stddev, q2 (median),
166# (q3-q1)/2 ("semi-interquartile range"), max-min (spread))
167class StatList:
168 def __init__(self,l,desc):
169 cnt = len(l)
170 assert(cnt > 1)
171 l.sort()
172 self.cnt = cnt
173 self.l = l
174 self.total = SumList(l)
175 self.mean = self.total / float(self.cnt)
176 self.s = math.sqrt(SumList([(x-self.mean) * (x - self.mean) for x in l]) / float(self.cnt-1))
177 self.q0 = l[0]
178 self.q1 = l[int(self.cnt/4.0+0.5)]
179 self.q2 = l[int(self.cnt/2.0+0.5)]
180 self.q3 = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))]
181 self.q4 = l[self.cnt-1]+1
182 self.siqr = (self.q3-self.q1)/2.0;
183 self.spread = (self.q4-self.q0)
184 self.str = '%s %d; mean %d; sdev %d; q2 %d; .5(q3-q1) %.1f; spread %d' % \
185 (desc, self.total, self.mean, self.s, self.q2, self.siqr, self.spread)
186 #end
187#end
188
189def RunCommand(args, ok = [0]):
190 #print 'run command %s' % (' '.join(args))
191 p = os.spawnvp(os.P_WAIT, args[0], args)
192 if p not in ok:
193 raise CommandError(args, 'exited %d' % p)
194 #end
195#end
196
197def RunCommandIO(args,infn,outfn):
198 p = os.fork()
199 if p == 0:
200 os.dup2(os.open(infn,os.O_RDONLY),0)
201 os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1)
202 os.execvp(args[0], args)
203 else:
204 s = os.waitpid(p,0)
205 o = os.WEXITSTATUS(s[1])
206 if not os.WIFEXITED(s[1]) or o != 0:
207 raise CommandError(args, 'exited %d' % o)
208 #end
209 #end
210#end
211
212class TimedTest:
213 def __init__(self, target, source, runnable,
214 skip_trials = SKIP_TRIALS,
215 min_trials = MIN_TRIALS,
216 max_trials = MAX_TRIALS,
217 min_stddev_pct = MIN_STDDEV_PCT):
218 self.target = target
219 self.source = source
220 self.runnable = runnable
221
222 self.skip_trials = skip_trials
223 self.min_trials = min(min_trials, max_trials)
224 self.max_trials = max_trials
225 self.min_stddev_pct = min_stddev_pct
226
227 self.encode_time = self.DoTest(DFILE,
228 lambda x: x.Encode(self.target, self.source, DFILE))
229 self.encode_size = runnable.EncodeSize(DFILE)
230
231 if SKIP_DECODE:
232 self.decode_time = StatList([1, 1], 'not decoded')
233 return
234 #end
235
236 self.decode_time = self.DoTest(RFILE,
237 lambda x: x.Decode(DFILE, self.source, RFILE),
238 )
239
240 # verify
241 runnable.Verify(self.target, RFILE)
242 #end
243
244 def DoTest(self, fname, func):
245 trials = 0
246 measured = []
247
248 while 1:
249 try:
250 os.remove(fname)
251 except OSError:
252 pass
253
254 start_time = time.time()
255 start_clock = time.clock()
256
257 func(self.runnable)
258
259 total_clock = (time.clock() - start_clock)
260 total_time = (time.time() - start_time)
261
262 elap_time = max(total_time, 0.0000001)
263 elap_clock = max(total_clock, 0.0000001)
264
265 trials = trials + 1
266
267 # skip some of the first trials
268 if trials > self.skip_trials:
269 measured.append((elap_clock, elap_time))
270 #print 'measurement total: %.1f ms' % (total_time * 1000.0)
271
272 # at least so many
273 if trials < (self.skip_trials + self.min_trials):
274 #print 'continue: need more trials: %d' % trials
275 continue
276
277 # compute %variance
278 done = 0
279 if self.skip_trials + self.min_trials <= 2:
280 measured = measured + measured;
281 done = 1
282 #end
283
284 time_stat = StatList([x[1] for x in measured], 'elap time')
285 sp = float(time_stat.s) / float(time_stat.mean)
286
287 # what if MAX_TRIALS is exceeded?
288 too_many = (trials - self.skip_trials) >= self.max_trials
289 good = (100.0 * sp) < self.min_stddev_pct
290 if done or too_many or good:
291 trials = trials - self.skip_trials
292 if not done and not good:
293 #print 'too many trials: %d' % trials
294 pass
295 #clock = StatList([x[0] for x in measured], 'elap clock')
296 return time_stat
297 #end
298 #end
299 #end
300#end
301
302def Decimals(start, end):
303 l = []
304 step = start
305 while 1:
306 r = range(step, step * 10, step)
307 l = l + r
308 if step * 10 >= end:
309 l.append(step * 10)
310 break
311 step = step * 10
312 return l
313#end
314
315# This tests the raw speed of 0-byte inputs
316def RunSpeedTest():
317 for L in Decimals(MIN_RUN, MAX_RUN):
318 SetFileSize(RUNFILE, L)
319
320 trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<20)]))
321 ReportSpeed(L, trx, '1MB ')
322
323 trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<19)]))
324 ReportSpeed(L, trx, '512k')
325
326 trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<18)]))
327 ReportSpeed(L, trx, '256k')
328
329 trm = TimedTest(RUNFILE, None, Xdelta3Mod1(RUNFILE))
330 ReportSpeed(L, trm, 'swig')
331
332 trg = TimedTest(RUNFILE, None, GzipRun1())
333 ReportSpeed(L,trg,'gzip')
334 #end
335#end
336
337def SetFileSize(F,L):
338 fd = os.open(F, os.O_CREAT | os.O_WRONLY)
339 os.ftruncate(fd,L)
340 assert os.fstat(fd).st_size == L
341 os.close(fd)
342#end
343
344def ReportSpeed(L,tr,desc):
345 print '%s run length %u: size %u: time %.3f ms: decode %.3f ms' % \
346 (desc, L,
347 tr.encode_size,
348 tr.encode_time.mean * 1000.0,
349 tr.decode_time.mean * 1000.0)
350#end
351
352class Xdelta3RunClass:
353 def __init__(self, extra):
354 self.extra = extra
355 #end
356
357 def __str__(self):
358 return ' '.join(self.extra)
359 #end
360
361 def New(self):
362 return Xdelta3Runner(self.extra)
363 #end
364#end
365
366class Xdelta3Runner:
367 def __init__(self, extra):
368 self.extra = extra
369 #end
370
371 def Encode(self, target, source, output):
372 args = (ALL_ARGS +
373 self.extra +
374 ['-e'])
375 if source:
376 args.append('-s')
377 args.append(source)
378 #end
379 args = args + [target, output]
380 self.Main(args)
381 #end
382
383 def Decode(self, input, source, output):
384 args = (ALL_ARGS +
385 ['-d'])
386 if source:
387 args.append('-s')
388 args.append(source)
389 #end
390 args = args + [input, output]
391 self.Main(args)
392 #end
393
394 def Verify(self, target, recon):
395 RunCommand(('cmp', target, recon))
396 #end
397
398 def EncodeSize(self, output):
399 return os.stat(output).st_size
400 #end
401
402 def Main(self, args):
403 try:
404 xdelta3main.main(args)
405 except Exception, e:
406 raise CommandError(args, "xdelta3.main exception")
407 #end
408 #end
409#end
410
411class Xdelta3Mod1:
412 def __init__(self, file):
413 self.target_data = open(file, 'r').read()
414 #end
415
416 def Encode(self, ignore1, ignore2, ignore3):
417 r1, encoded = xdelta3.xd3_encode_memory(self.target_data, None, 1000000, 1<<10)
418 if r1 != 0:
419 raise CommandError('memory', 'encode failed: %s' % r1)
420 #end
421 self.encoded = encoded
422 #end
423
424 def Decode(self, ignore1, ignore2, ignore3):
425 r2, data1 = xdelta3.xd3_decode_memory(self.encoded, None, len(self.target_data))
426 if r2 != 0:
427 raise CommandError('memory', 'decode failed: %s' % r1)
428 #end
429 self.decoded = data1
430 #end
431
432 def Verify(self, ignore1, ignore2):
433 if self.target_data != self.decoded:
434 raise CommandError('memory', 'bad decode')
435 #end
436 #end
437
438 def EncodeSize(self, ignore1):
439 return len(self.encoded)
440 #end
441#end
442
443class GzipRun1:
444 def Encode(self, target, source, output):
445 assert source == None
446 RunCommandIO(['gzip', '-cf'], target, output)
447 #end
448
449 def Decode(self, input, source, output):
450 assert source == None
451 RunCommandIO(['gzip', '-dcf'], input, output)
452 #end
453
454 def Verify(self, target, recon):
455 RunCommand(('cmp', target, recon))
456 #end
457
458 def EncodeSize(self, output):
459 return os.stat(output).st_size
460 #end
461#end
462
463class Xdelta1RunClass:
464 def __str__(self):
465 return 'xdelta1'
466 #end
467
468 def New(self):
469 return Xdelta1Runner()
470 #end
471#end
472
473class Xdelta1Runner:
474 def Encode(self, target, source, output):
475 assert source != None
476 args = ['xdelta1', 'delta', '-q', source, target, output]
477 RunCommand(args, [0, 1])
478 #end
479
480 def Decode(self, input, source, output):
481 assert source != None
482 args = ['xdelta1', 'patch', '-q', input, source, output]
483 # Note: for dumb historical reasons, xdelta1 returns 1 or 0
484 RunCommand(args)
485 #end
486
487 def Verify(self, target, recon):
488 RunCommand(('cmp', target, recon))
489 #end
490
491 def EncodeSize(self, output):
492 return os.stat(output).st_size
493 #end
494#end
495
496# exceptions
497class SkipRcsException:
498 def __init__(self,reason):
499 self.reason = reason
500 #end
501#end
502
503class NotEnoughVersions:
504 def __init__(self):
505 pass
506 #end
507#end
508
509class CommandError:
510 def __init__(self,cmd,str):
511 if type(cmd) is types.TupleType or \
512 type(cmd) is types.ListType:
513 cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd)
514 #end
515 print 'command was: ',cmd
516 print 'command failed: ',str
517 print 'have fun debugging'
518 #end
519#end
520
521class RcsVersion:
522 def __init__(self,vstr):
523 self.vstr = vstr
524 #end
525 def __cmp__(self,other):
526 return cmp(self.date, other.date)
527 #end
528 def __str__(self):
529 return str(self.vstr)
530 #end
531#end
532
533class RcsFile:
534
535 def __init__(self, fname):
536 self.fname = fname
537 self.versions = []
538 self.state = HEAD_STATE
539 #end
540
541 def SetTotRev(self,s):
542 self.totrev = int(s)
543 #end
544
545 def Rev(self,s):
546 self.rev = RcsVersion(s)
547 if len(self.versions) >= self.totrev:
548 raise SkipRcsException('too many versions (in log messages)')
549 #end
550 self.versions.append(self.rev)
551 #end
552
553 def Date(self,s):
554 self.rev.date = s
555 #end
556
557 def Match(self, line, state, rx, gp, newstate, f):
558 if state == self.state:
559 m = rx.match(line)
560 if m:
561 if f:
562 f(m.group(gp))
563 #end
564 self.state = newstate
565 return 1
566 #end
567 #end
568 return None
569 #end
570
571 def Sum1Rlog(self):
572 f = os.popen('rlog '+self.fname, "r")
573 l = f.readline()
574 while l:
575 if self.Match(l, HEAD_STATE, RE_TOTREV, 1, BAR_STATE, self.SetTotRev):
576 pass
577 elif self.Match(l, BAR_STATE, RE_BAR, 1, REV_STATE, None):
578 pass
579 elif self.Match(l, REV_STATE, RE_REV, 1, DATE_STATE, self.Rev):
580 pass
581 elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date):
582 pass
583 #end
584 l = f.readline()
585 #end
586 c = f.close()
587 if c != None:
588 raise c
589 #end
590 #end
591
592 def Sum1(self):
593 st = os.stat(self.fname)
594 self.rcssize = st.st_size
595 self.Sum1Rlog()
596 if self.totrev != len(self.versions):
597 raise SkipRcsException('wrong version count')
598 #end
599 self.versions.sort()
600 #end
601
602 def Checkout(self,n):
603 v = self.versions[n]
604 out = open(self.Verf(n), "w")
605 cmd = 'co -ko -p%s %s' % (v.vstr, self.fname)
606 total = 0
607 (inf,
608 stream,
609 err) = os.popen3(cmd, "r")
610 inf.close()
611 buf = stream.read()
612 while buf:
613 total = total + len(buf)
614 out.write(buf)
615 buf = stream.read()
616 #end
617 v.vsize = total
618 estr = ''
619 buf = err.read()
620 while buf:
621 estr = estr + buf
622 buf = err.read()
623 #end
624 if stream.close():
625 raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr))
626 #end
627 out.close()
628 err.close()
629 #end
630
631 def Vdate(self,n):
632 return self.versions[n].date
633 #end
634
635 def Vstr(self,n):
636 return self.versions[n].vstr
637 #end
638
639 def Verf(self,n):
640 return os.path.join(TMPDIR, 'input.%d' % n)
641 #end
642
643 def FilePairsByDate(self, runclass):
644 if self.totrev < 2:
645 raise NotEnoughVersions()
646 #end
647 self.Checkout(0)
648 ntrials = []
649 if self.totrev < 2:
650 return vtrials
651 #end
652 for v in range(0,self.totrev-1):
653 if v > 1:
654 os.remove(self.Verf(v-1))
655 #end
656 self.Checkout(v+1)
657 if os.stat(self.Verf(v)).st_size < MIN_SIZE or \
658 os.stat(self.Verf(v+1)).st_size < MIN_SIZE:
659 continue
660 #end
661
662 result = TimedTest(self.Verf(v+1),
663 self.Verf(v),
664 runclass.New())
665
666 target_size = os.stat(self.Verf(v+1)).st_size
667
668 ntrials.append(result)
669 #end
670
671 os.remove(self.Verf(self.totrev-1))
672 os.remove(self.Verf(self.totrev-2))
673 return ntrials
674 #end
675
676 def AppendVersion(self, f, n):
677 self.Checkout(n)
678 rf = open(self.Verf(n), "r")
679 data = rf.read()
680 f.write(data)
681 rf.close()
682 return len(data)
683 #end
684
685class RcsFinder:
686 def __init__(self):
687 self.subdirs = []
688 self.rcsfiles = []
689 self.others = []
690 self.skipped = []
691 self.biground = 0
692 #end
693
694 def Scan1(self,dir):
695 dents = os.listdir(dir)
696 subdirs = []
697 rcsfiles = []
698 others = []
699 for dent in dents:
700 full = os.path.join(dir, dent)
701 if os.path.isdir(full):
702 subdirs.append(full)
703 elif dent[len(dent)-2:] == ",v":
704 rcsfiles.append(RcsFile(full))
705 else:
706 others.append(full)
707 #end
708 #end
709 self.subdirs = self.subdirs + subdirs
710 self.rcsfiles = self.rcsfiles + rcsfiles
711 self.others = self.others + others
712 return subdirs
713 #end
714
715 def Crawl(self, dir):
716 subdirs = [dir]
717 while subdirs:
718 s1 = self.Scan1(subdirs[0])
719 subdirs = subdirs[1:] + s1
720 #end
721 #end
722
723 def Summarize(self):
724 good = []
725 for rf in self.rcsfiles:
726 try:
727 rf.Sum1()
728 if rf.totrev < 2:
729 raise SkipRcsException('too few versions (< 2)')
730 #end
731 except SkipRcsException, e:
732 #print 'skipping file %s: %s' % (rf.fname, e.reason)
733 self.skipped.append(rf)
734 else:
735 good.append(rf)
736 #end
737 self.rcsfiles = good
738 #end
739
740 def AllPairsByDate(self, runclass):
741 results = []
742 good = []
743 for rf in self.rcsfiles:
744 try:
745 results = results + rf.FilePairsByDate(runclass)
746 except SkipRcsException:
747 print 'file %s has compressed versions: skipping' % (rf.fname)
748 except NotEnoughVersions:
749 print 'testing %s on %s: not enough versions' % (runclass, rf.fname)
750 else:
751 good.append(rf)
752 #end
753 self.rcsfiles = good
754 self.ReportPairs(runclass, results)
755 return results
756 #end
757
758 def ReportPairs(self, name, results):
759 encode_time = 0
760 decode_time = 0
761 encode_size = 0
762 for r in results:
763 encode_time += r.encode_time.mean
764 decode_time += r.decode_time.mean
765 encode_size += r.encode_size
766 #end
767 print '%s rcs: encode %.2f s: decode %.2f s: size %d' % \
768 (name, encode_time, decode_time, encode_size)
769 #end
770
771 def MakeBigFiles(self, rand):
772 f1 = open(TMPDIR + "/big.1", "w")
773 f2 = open(TMPDIR + "/big.2", "w")
774 population = []
775 for file in self.rcsfiles:
776 if len(file.versions) < 2:
777 continue
778 population.append(file)
779 #end
780 f1sz = 0
781 f2sz = 0
782 fcount = int(len(population) * FILE_P)
783 assert fcount > 0
784 for file in rand.sample(population, fcount):
785 m = IGNORE_FILENAME.match(file.fname)
786 if m != None:
787 continue
788 #end
789 r1, r2 = rand.sample(xrange(0, len(file.versions)), 2)
790 f1sz += file.AppendVersion(f1, r1)
791 f2sz += file.AppendVersion(f2, r2)
792 #m.update('%s,%s,%s ' % (file.fname[len(RCSDIR):], file.Vstr(r1), file.Vstr(r2)))
793 #end
794 testkey = 'rcs%d' % self.biground
795 self.biground = self.biground + 1
796
797 print '%s; source %u bytes; target %u bytes' % (testkey, f1sz, f2sz)
798 f1.close()
799 f2.close()
800 return (TMPDIR + "/big.1",
801 TMPDIR + "/big.2",
802 testkey)
803 #end
804
805 def Generator(self):
806 return lambda rand: self.MakeBigFiles(rand)
807 #end
808#end
809
810# find a set of RCS files for testing
811def GetTestRcsFiles():
812 rcsf = RcsFinder()
813 rcsf.Crawl(RCSDIR)
814 if len(rcsf.rcsfiles) == 0:
815 raise CommandError('', 'no RCS files')
816 #end
817 rcsf.Summarize()
818 print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % (len(rcsf.rcsfiles),
819 len(rcsf.subdirs),
820 len(rcsf.others),
821 len(rcsf.skipped))
822 print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize").str
823 print StatList([x.totrev for x in rcsf.rcsfiles], "totrev").str
824 return rcsf
825#end
826
827class SampleDataTest:
828 def __init__(self, dirs):
829 self.pairs = []
830 while dirs:
831 d = dirs[0]
832 dirs = dirs[1:]
833 l = os.listdir(d)
834 files = []
835 for e in l:
836 p = os.path.join(d, e)
837 if os.path.isdir(p):
838 dirs.append(p)
839 else:
840 files.append(p)
841 #end
842 #end
843 if len(files) > 1:
844 files.sort()
845 for x in xrange(len(files) - 1):
846 self.pairs.append((files[x], files[x+1],
847 '%s-%s' % (files[x], files[x+1])))
848 #end
849 #end
850 #end
851 #end
852
853 def Generator(self):
854 return lambda rand: rand.choice(self.pairs)
855 #end
856#end
857
858# configs are represented as a list of values,
859# program takes a list of strings:
860def ConfigToArgs(config):
861 args = [ '-C',
862 ','.join([str(x) for x in config[0:SOFT_CONFIG_CNT]])]
863 for i in range(SOFT_CONFIG_CNT, len(CONFIG_ORDER)):
864 key = CONFIG_ARGMAP[CONFIG_ORDER[i]]
865 val = config[i]
866 if val == 'true' or val == 'false':
867 if val == 'true':
868 args.append('%s' % key)
869 #end
870 else:
871 args.append('%s=%s' % (key, val))
872 #end
873 #end
874 return args
875#end
876
877#
878class RandomTest:
879 def __init__(self, tnum, tinput, config, syntuple = None):
880 self.mytinput = tinput[2]
881 self.myconfig = config
882 self.tnum = tnum
883
884 if syntuple != None:
885 self.runtime = syntuple[0]
886 self.compsize = syntuple[1]
887 self.decodetime = None
888 else:
889 args = ConfigToArgs(config)
890 result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args))
891
892 self.runtime = result.encode_time.mean
893 self.compsize = result.encode_size
894 self.decodetime = result.decode_time.mean
895 #end
896
897 self.score = None
898 self.time_pos = None
899 self.size_pos = None
900 self.score_pos = None
901 #end
902
903 def __str__(self):
904 decodestr = ''
905 if not SKIP_DECODE:
906 decodestr = ' %.6f' % self.decodetime
907 #end
908 return 'time %.6f%s size %d%s << %s >>%s' % (
909 self.time(), ((self.time_pos != None) and (" (%s)" % self.time_pos) or ""),
910 self.size(), ((self.size_pos != None) and (" (%s)" % self.size_pos) or ""),
911 c2str(self.config()),
912 decodestr)
913 #end
914
915 def time(self):
916 return self.runtime
917 #end
918
919 def size(self):
920 return self.compsize
921 #end
922
923 def config(self):
924 return self.myconfig
925 #end
926
927 def score(self):
928 return self.score
929 #end
930
931 def tinput(self):
932 return self.mytinput
933 #end
934#end
935
936def PosInAlist(l, e):
937 for i in range(0, len(l)):
938 if l[i][1] == e:
939 return i;
940 #end
941 #end
942 return -1
943#end
944
945# Generates a set of num_results test configurations, given the list of
946# retest-configs.
947def RandomTestConfigs(rand, input_configs, num_results):
948
949 outputs = input_configs[:]
950 have_set = dict([(c,c) for c in input_configs])
951
952 # Compute a random configuration
953 def RandomConfig():
954 config = []
955 cmap = {}
956 for key in CONFIG_ORDER:
957 val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap)
958 config.append(val)
959 #end
960 return tuple(config)
961 #end
962
963 while len(outputs) < num_results:
964 newc = None
965 for i in xrange(10):
966 c = RandomConfig()
967 if have_set.has_key(c):
968 continue
969 #end
970 have_set[c] = c
971 newc = c
972 break
973 if newc is None:
974 print 'stopped looking for configs at %d' % len(outputs)
975 break
976 #end
977 outputs.append(c)
978 #end
979 outputs.sort()
980 return outputs
981#end
982
983def RunTestLoop(rand, generator, rounds):
984 configs = []
985 for rnum in xrange(rounds):
986 configs = RandomTestConfigs(rand, configs, MAX_RESULTS)
987 tinput = generator(rand)
988 tests = []
989 for x in xrange(len(configs)):
990 t = RandomTest(x, tinput, configs[x])
991 print 'Round %d test %d: %s' % (rnum, x, t)
992 tests.append(t)
993 #end
994 results = ScoreTests(tests)
995
996 for r in results:
997 c = r.config()
998 if not test_all_config_results.has_key(c):
999 test_all_config_results[c] = [r]
1000 else:
1001 test_all_config_results[c].append(r)
1002 #end
1003 #end
1004
1005 GraphResults('expt%d' % rnum, results)
1006 GraphSummary('sum%d' % rnum, results)
1007
1008 # re-test some fraction
1009 configs = [r.config() for r in results[0:int(MAX_RESULTS * KEEP_P)]]
1010 #end
1011#end
1012
1013# TODO: cleanup
1014test_all_config_results = {}
1015
1016def ScoreTests(results):
1017 scored = []
1018 timed = []
1019 sized = []
1020
1021 t_min = float(min([test.time() for test in results]))
1022 #t_max = float(max([test.time() for test in results]))
1023 s_min = float(min([test.size() for test in results]))
1024 #s_max = float(max([test.size() for test in results]))
1025
1026 for test in results:
1027
1028 # Hyperbolic function. Smaller scores still better
1029 red = 0.999 # minimum factors for each dimension are 1/1000
1030 test.score = ((test.size() - s_min * red) *
1031 (test.time() - t_min * red))
1032
1033 scored.append((test.score, test))
1034 timed.append((test.time(), test))
1035 sized.append((test.size(), test))
1036 #end
1037
1038 scored.sort()
1039 timed.sort()
1040 sized.sort()
1041
1042 best_by_size = []
1043 best_by_time = []
1044
1045 pos = 0
1046 for (score, test) in scored:
1047 pos += 1
1048 test.score_pos = pos
1049 #end
1050
1051 scored = [x[1] for x in scored]
1052
1053 for test in scored:
1054 test.size_pos = PosInAlist(sized, test)
1055 test.time_pos = PosInAlist(timed, test)
1056 #end
1057
1058 for test in scored:
1059 c = test.config()
1060 s = 0.0
1061 print 'H-Score: %0.9f %s' % (test.score, test)
1062 #end
1063
1064 return scored
1065#end
1066
1067def GraphResults(desc, results):
1068 f = open("data-%s.csv" % desc, "w")
1069 for r in results:
1070 f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r))
1071 #end
1072 f.close()
1073 os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc))
1074#end
1075
1076def GraphSummary(desc, results_ignore):
1077 test_population = 0
1078 config_ordered = []
1079
1080 # drops duplicate test/config pairs (TODO: don't retest them)
1081 for config, cresults in test_all_config_results.items():
1082 input_config_map = {}
1083 uniq = []
1084 for test in cresults:
1085 assert test.config() == config
1086 test_population += 1
1087 key = test.tinput()
1088 if not input_config_map.has_key(key):
1089 input_config_map[key] = {}
1090 #end
1091 if input_config_map[key].has_key(config):
1092 print 'skipping repeat test %s vs. %s' % (input_config_map[key][config], test)
1093 continue
1094 #end
1095 input_config_map[key][config] = test
1096 uniq.append(test)
1097 #end
1098 config_ordered.append(uniq)
1099 #end
1100
1101 # sort configs descending by number of tests
1102 config_ordered.sort(lambda x, y: len(y) - len(x))
1103
1104 print 'population %d: %d configs %d results' % \
1105 (test_population,
1106 len(config_ordered),
1107 len(config_ordered[0]))
1108
1109 if config_ordered[0] == 1:
1110 return
1111 #end
1112
1113 # a map from test-key to test-list w/ various configs
1114 input_set = {}
1115 osize = len(config_ordered)
1116
1117 for i in xrange(len(config_ordered)):
1118 config = config_ordered[i][0].config()
1119 config_tests = config_ordered[i]
1120
1121 #print '%s has %d tested inputs' % (config, len(config_tests))
1122
1123 if len(input_set) == 0:
1124 input_set = dict([(t.tinput(), [t]) for t in config_tests])
1125 continue
1126 #end
1127
1128 # a map from test-key to test-list w/ various configs
1129 update_set = {}
1130 for r in config_tests:
1131 t = r.tinput()
1132 if input_set.has_key(t):
1133 update_set[t] = input_set[t] + [r]
1134 else:
1135 #print 'config %s does not have test %s' % (config, t)
1136 pass
1137 #end
1138 #end
1139
1140 if len(update_set) <= 1:
1141 break
1142 #end
1143
1144 input_set = update_set
1145
1146 # continue if there are more w/ the same number of inputs
1147 if i < (len(config_ordered) - 1) and \
1148 len(config_ordered[i + 1]) == len(config_tests):
1149 continue
1150 #end
1151
1152 # synthesize results for multi-test inputs
1153 config_num = None
1154
1155 # map of config to sum(various test-keys)
1156 smap = {}
1157 for (key, tests) in input_set.items():
1158 if config_num == None:
1159 # config_num should be the same in all elements
1160 config_num = len(tests)
1161 smap = dict([(r.config(),
1162 (r.time(),
1163 r.size()))
1164 for r in tests])
1165 else:
1166 # compuate the per-config sum of time/size
1167 assert config_num == len(tests)
1168 smap = dict([(r.config(),
1169 (smap[r.config()][0] + r.time(),
1170 smap[r.config()][1] + r.size()))
1171 for r in tests])
1172 #end
1173 #end
1174
1175 if config_num == 1:
1176 continue
1177 #end
1178
1179 if len(input_set) == osize:
1180 break
1181 #end
1182
1183 summary = '%s-%d' % (desc, len(input_set))
1184 osize = len(input_set)
1185
1186 print 'generate %s w/ %d configs' % (summary, config_num)
1187 syn = [RandomTest(0, (None, None, summary), config,
1188 syntuple = (smap[config][0], smap[config][1]))
1189 for config in smap.keys()]
1190 syn = ScoreTests(syn)
1191 #print 'smap is %s' % (smap,)
1192 #print 'syn is %s' % (' and '.join([str(x) for x in syn]))
1193 GraphResults(summary, syn)
1194 #end
1195#end
1196
1197if __name__ == "__main__":
1198 try:
1199 RunCommand(['rm', '-rf', TMPDIR])
1200 os.mkdir(TMPDIR)
1201
1202 rcsf = GetTestRcsFiles()
1203 generator = rcsf.Generator()
1204
1205 #sample = SampleDataTest([SAMPLEDIR])
1206 #generator = sample.Generator()
1207
1208 rand = random.Random(135135135135135)
1209 RunTestLoop(rand, generator, TEST_ROUNDS)
1210
1211 #RunSpeedTest()
1212
1213 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9']))
1214 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-S', 'djw']))
1215 #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-T']))
1216
1217 #x1r = rcsf.AllPairsByDate(Xdelta1RunClass())
1218
1219 except CommandError:
1220 pass
1221 else:
1222 RunCommand(['rm', '-rf', TMPDIR])
1223 pass
1224 #end
1225#end
Note: See TracBrowser for help on using the repository browser.