00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081 #ifndef DOXYGEN_IGNORE
00082 #include <iostream>
00083 #include <sstream>
00084 #include <cstdlib>
00085 #include <list>
00086 #include <map>
00087 #include <vector>
00088 #include <cassert>
00089 #include <bitset>
00090 #include <algorithm>
00091 #include <string>
00092 #include <tr1/memory>
00093
00094 #include <stdint.h>
00095
00096 #include <tag/tuple.h>
00097 #include <tag/printf.h>
00098 #include <tag/stdpp.h>
00099 #include <tag/fn.h>
00100
00101 #include <cvd/image_ref.h>
00102
00103 #include <gvars3/instances.h>
00104 #endif
00105
00106 using namespace std;
00107 using namespace tr1;
00108 using namespace tag;
00109 using namespace CVD;
00110 using namespace GVars3;
00111
00112
00113
00114 enum Ternary
00115 {
00116 Brighter='b',
00117 Darker ='d',
00118 Similar ='s'
00119 };
00120
00121
00122
00123
00124
00125 #define fatal(E, S, ...) vfatal((E), (S), (tag::Fmt,## __VA_ARGS__))
00126
00127
00128
00129
00130
00131 template<class C> void vfatal(int err, const string& s, const C& list)
00132 {
00133 vfPrintf(cerr, s + "\n", list);
00134 exit(err);
00135 }
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146 template<int FEATURE_SIZE> struct datapoint
00147 {
00148
00149
00150
00151
00152 datapoint(const string& s, unsigned long c, bool is)
00153 :count(c),is_a_corner(is)
00154 {
00155 pack_trits(s);
00156 }
00157
00158
00159
00160 datapoint()
00161 {}
00162
00163 unsigned long count;
00164 bool is_a_corner;
00165
00166 static const unsigned int max_size = FEATURE_SIZE;
00167
00168
00169
00170
00171
00172 Ternary get_trit(unsigned int tnum) const
00173 {
00174 assert(tnum < size);
00175 if(tests[tnum] == 1)
00176 return Brighter;
00177 else if(tests[tnum + max_size] == 1)
00178 return Darker;
00179 else
00180 return Similar;
00181 }
00182
00183 private:
00184
00185 bitset<max_size*2> tests;
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203 void pack_trits(const string& unpacked)
00204 {
00205 tests = 0;
00206 for(unsigned int i=0;i < unpacked.size(); i++)
00207 {
00208 if(unpacked[i] == 'b')
00209 set_trit(i, Brighter);
00210 else if(unpacked[i] == 'd')
00211 set_trit(i, Darker);
00212 else if(unpacked[i] == 's')
00213 set_trit(i, Similar);
00214 else
00215 fatal(2, "Bad char while packing datapoint: %s", unpacked);
00216 }
00217 }
00218
00219
00220
00221
00222 void set_trit(unsigned int tnum, Ternary val)
00223 {
00224 assert(val == Brighter || val == Darker || val == Similar);
00225 assert(tnum < max_size);
00226
00227 if(val == Brighter)
00228 tests[tnum] = 1;
00229 else if(val == Darker)
00230 tests[tnum + max_size] = 1;
00231 }
00232 };
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246 template<int S> typename V_tuple<shared_ptr<vector<datapoint<S> > >, uint64_t >::type load_features(unsigned int nfeats)
00247 {
00248 shared_ptr<vector<datapoint<S> > > ret(new vector<datapoint<S> >);
00249
00250
00251 string unpacked_feature;
00252
00253 uint64_t total_num = 0;
00254
00255 uint64_t line_num=2;
00256
00257 for(;;)
00258 {
00259 uint64_t count;
00260 bool is;
00261
00262 cin >> unpacked_feature >> count >> is;
00263
00264 if(!cin)
00265 break;
00266
00267 line_num++;
00268
00269 if(unpacked_feature.size() != nfeats)
00270 fatal(1, "Feature string length is %i, not %i on line %i", unpacked_feature.size(), nfeats, line_num);
00271
00272 if(count == 0)
00273 fatal(4, "Zero count is invalid");
00274
00275 ret->push_back(datapoint<S>(unpacked_feature, count, is));
00276
00277 total_num += count;
00278 }
00279
00280 cerr << "Num features: " << total_num << endl
00281 << "Num distinct: " << ret->size() << endl;
00282
00283 return make_vtuple(ret, total_num);
00284 }
00285
00286
00287
00288
00289
00290
00291 double entropy(uint64_t n, uint64_t c1)
00292 {
00293 assert(c1 <= n);
00294
00295 if(n == 0)
00296 return 0;
00297 else if(c1 == 0 || c1 == n)
00298 return 0;
00299 else
00300 {
00301 double p1 = (double)c1 / n;
00302 double p2 = 1-p1;
00303
00304 return -(double)n*(p1*log(p1) + p2*log(p2)) / log(2.f);
00305 }
00306 }
00307
00308
00309
00310
00311
00312
00313 template<int S> int find_best_split(const vector<datapoint<S> >& fs, const vector<double>& weights, unsigned int nfeats)
00314 {
00315 assert(nfeats == weights.size());
00316 uint64_t num_total = 0, num_corners=0;
00317
00318 for(typename vector<datapoint<S> >::const_iterator i=fs.begin(); i != fs.end(); i++)
00319 {
00320 num_total += i->count;
00321 if(i->is_a_corner)
00322 num_corners += i->count;
00323 }
00324
00325 double total_entropy = entropy(num_total, num_corners);
00326
00327 double biggest_delta = 0;
00328 int feature_num = -1;
00329
00330 for(unsigned int i=0; i < nfeats; i++)
00331 {
00332 uint64_t num_bri = 0, num_dar = 0, num_sim = 0;
00333 uint64_t cor_bri = 0, cor_dar = 0, cor_sim = 0;
00334
00335 for(typename vector<datapoint<S> >::const_iterator f=fs.begin(); f != fs.end(); f++)
00336 {
00337 switch(f->get_trit(i))
00338 {
00339 case Brighter:
00340 num_bri += f->count;
00341 if(f->is_a_corner)
00342 cor_bri += f->count;
00343 break;
00344
00345 case Darker:
00346 num_dar += f->count;
00347 if(f->is_a_corner)
00348 cor_dar += f->count;
00349 break;
00350
00351 case Similar:
00352 num_sim += f->count;
00353 if(f->is_a_corner)
00354 cor_sim += f->count;
00355 break;
00356 }
00357 }
00358
00359 double delta_e = total_entropy - (entropy(num_bri, cor_bri) + entropy(num_dar, cor_dar) + entropy(num_sim, cor_sim));
00360
00361 delta_e *= weights[i];
00362
00363 if(delta_e > biggest_delta)
00364 {
00365 biggest_delta = delta_e;
00366 feature_num = i;
00367 }
00368 }
00369
00370 if(feature_num == -1)
00371 fatal(3, "Couldn't find a split.");
00372
00373 return feature_num;
00374 }
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387 struct tree{
00388
00389
00390
00391
00392 enum IsCorner
00393 {
00394 Corner,
00395 NonCorner,
00396 NonTerminal
00397 };
00398
00399 const shared_ptr<tree> brighter;
00400 const shared_ptr<tree> darker;
00401 const shared_ptr<tree> similar;
00402 const IsCorner is_a_corner;
00403 const int feature_to_test;
00404 const uint64_t num_datapoints;
00405
00406
00407
00408
00409
00410
00411 string stringify()
00412 {
00413 if(is_a_corner == NonTerminal)
00414 return "(" + brighter->stringify() + darker->stringify() + similar->stringify() + ")";
00415 else
00416 return string("(") + (is_a_corner == Corner?"1":"0") + ")";
00417 }
00418
00419
00420
00421
00422
00423 static shared_ptr<tree> CornerLeaf(uint64_t n)
00424 {
00425 return shared_ptr<tree>(new tree(Corner, n));
00426 }
00427
00428
00429
00430
00431
00432 static shared_ptr<tree> NonCornerLeaf(uint64_t n)
00433 {
00434 return shared_ptr<tree>(new tree(NonCorner, n));
00435 }
00436
00437
00438
00439
00440
00441
00442
00443 tree(shared_ptr<tree> b, shared_ptr<tree> d, shared_ptr<tree> s, int n, uint64_t num)
00444 :brighter(b), darker(d), similar(s), is_a_corner(NonTerminal), feature_to_test(n), num_datapoints(num)
00445 {}
00446
00447 private:
00448
00449
00450
00451
00452
00453 tree(IsCorner c, uint64_t n)
00454 :is_a_corner(c),feature_to_test(-1),num_datapoints(n)
00455 {}
00456 };
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468 template<int S> shared_ptr<tree> build_tree(vector<datapoint<S> >& corners, const vector<double>& weights, int nfeats)
00469 {
00470
00471 int f = find_best_split<S>(corners, weights, nfeats);
00472
00473
00474
00475
00476
00477
00478 vector<datapoint<S> > brighter, darker, similar;
00479 uint64_t num_bri=0, cor_bri=0, num_dar=0, cor_dar=0, num_sim=0, cor_sim=0;
00480
00481 for(size_t i=0; i < corners.size(); i++)
00482 {
00483 switch(corners[i].get_trit(f))
00484 {
00485 case Brighter:
00486 brighter.push_back(corners[i]);
00487 num_bri += corners[i].count;
00488 if(corners[i].is_a_corner)
00489 cor_bri += corners[i].count;
00490 break;
00491
00492 case Darker:
00493 darker.push_back(corners[i]);
00494 num_dar += corners[i].count;
00495 if(corners[i].is_a_corner)
00496 cor_dar += corners[i].count;
00497 break;
00498
00499 case Similar:
00500 similar.push_back(corners[i]);
00501 num_sim += corners[i].count;
00502 if(corners[i].is_a_corner)
00503 cor_sim += corners[i].count;
00504 break;
00505 }
00506 }
00507
00508
00509 corners.clear();
00510
00511
00512
00513 uint64_t num_tests = num_bri + num_dar + num_sim;
00514
00515
00516
00517 shared_ptr<tree> b_tree, d_tree, s_tree;
00518
00519
00520
00521
00522 if(cor_bri == 0)
00523 b_tree = tree::NonCornerLeaf(num_bri);
00524 else if(cor_bri == num_bri)
00525 b_tree = tree::CornerLeaf(num_bri);
00526 else
00527 b_tree = build_tree<S>(brighter, weights, nfeats);
00528
00529
00530 if(cor_dar == 0)
00531 d_tree = tree::NonCornerLeaf(num_dar);
00532 else if(cor_dar == num_dar)
00533 d_tree = tree::CornerLeaf(num_dar);
00534 else
00535 d_tree = build_tree<S>(darker, weights, nfeats);
00536
00537
00538 if(cor_sim == 0)
00539 s_tree = tree::NonCornerLeaf(num_sim);
00540 else if(cor_sim == num_sim)
00541 s_tree = tree::CornerLeaf(num_sim);
00542 else
00543 s_tree = build_tree<S>(similar, weights, nfeats);
00544
00545 return shared_ptr<tree>(new tree(b_tree, d_tree, s_tree, f, num_tests));
00546 }
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601 void print_tree(const tree* node, ostream& o, const string& i="")
00602 {
00603 if(node->is_a_corner == tree::Corner)
00604 o << i << "corner" << endl;
00605 else if(node->is_a_corner == tree::NonCorner)
00606 o << i << "background" << endl;
00607 else
00608 {
00609 string b = node->brighter->stringify();
00610 string d = node->darker->stringify();
00611 string s = node->similar->stringify();
00612
00613 const tree * bt = node->brighter.get();
00614 const tree * dt = node->darker.get();
00615 const tree * st = node->similar.get();
00616 string ii = i + " ";
00617
00618 int f = node->feature_to_test;
00619
00620 if(b == d && d == s)
00621 {
00622
00623 print_tree(st, o, i);
00624 }
00625 else if(d == s)
00626 {
00627 o << i << "if_brighter " << f << " " << bt->num_datapoints << " " << dt->num_datapoints+st->num_datapoints << endl;
00628 print_tree(bt, o, ii);
00629 o << i << "else" << endl;
00630 print_tree(st, o, ii);
00631 o << i << "end" << endl;
00632
00633 }
00634 else if(b == s)
00635 {
00636 o << i << "if_darker " << f << " " << dt->num_datapoints << " " << bt->num_datapoints + st->num_datapoints << endl;
00637 print_tree(dt, o, ii);
00638 o << i << "else" << endl;
00639 print_tree(st, o, ii);
00640 o << i << "end" << endl;
00641 }
00642 else if(b == d)
00643 {
00644 o << i << "if_either " << f << " " << bt->num_datapoints + dt->num_datapoints << " " << st->num_datapoints << endl;
00645 print_tree(bt, o, ii);
00646 o << i << "else" << endl;
00647 print_tree(st, o, ii);
00648 o << i << "end" << endl;
00649 }
00650 else
00651 {
00652 o << i << "if_brighter " << f << " " << bt->num_datapoints << " " << dt->num_datapoints << " " << st->num_datapoints << endl;
00653 print_tree(bt, o, ii);
00654 o << i << "elsf_darker " << f << endl;
00655 print_tree(dt, o, ii);
00656 o << i << "else" << endl;
00657 print_tree(st, o, ii);
00658 o << i << "end" << endl;
00659 }
00660 }
00661 }
00662
00663
00664
00665
00666
00667
00668 template<int S> V_tuple<shared_ptr<tree>, uint64_t>::type load_and_build_tree(unsigned int num_features, const vector<double>& weights)
00669 {
00670 assert(weights.size() == num_features);
00671
00672 shared_ptr<vector<datapoint<S> > > l;
00673 uint64_t num_datapoints;
00674
00675
00676 make_rtuple(l, num_datapoints) = load_features<S>(num_features);
00677
00678 cerr << "Loaded.\n";
00679
00680
00681 shared_ptr<tree> tree;
00682 tree = build_tree<S>(*l, weights, num_features);
00683
00684 return make_vtuple(tree, num_datapoints);
00685 }
00686
00687
00688
00689
00690
00691
00692 int main(int argc, char** argv)
00693 {
00694
00695 GUI.parseArguments(argc, argv);
00696
00697 cin.sync_with_stdio(false);
00698 cout.sync_with_stdio(false);
00699
00700
00701
00702
00703
00704
00705 unsigned int num_features;
00706 cin >> num_features;
00707 if(!cin.good() || cin.eof())
00708 fatal(6, "Error reading number of features.");
00709
00710
00711 vector<ImageRef> offsets(num_features);
00712 for(unsigned int i=0; i < num_features; i++)
00713 cin >> offsets[i];
00714 if(!cin.good() || cin.eof())
00715 fatal(7, "Error reading offset list.");
00716
00717
00718 vector<double> weights(offsets.size());
00719 for(unsigned int i=0; i < weights.size(); i++)
00720 weights[i] = GV3::get<double>(sPrintf("weights.%i", i), 1, 1);
00721
00722
00723 shared_ptr<tree> tree;
00724 uint64_t num_datapoints;
00725
00726
00727
00728
00729 if(num_features <= 16)
00730 make_rtuple(tree, num_datapoints) = load_and_build_tree<16>(num_features, weights);
00731 else if(num_features <= 32)
00732 make_rtuple(tree, num_datapoints) = load_and_build_tree<32>(num_features, weights);
00733 else if(num_features <= 48)
00734 make_rtuple(tree, num_datapoints) = load_and_build_tree<48>(num_features, weights);
00735 else if(num_features <= 64)
00736 make_rtuple(tree, num_datapoints) = load_and_build_tree<64>(num_features, weights);
00737 else
00738 fatal(8, "Too many feratures (%i). To learn from this, see %s, line %i.", num_features, __FILE__, __LINE__);
00739
00740
00741 cout << num_features << endl;
00742 copy(offsets.begin(), offsets.end(), ostream_iterator<ImageRef>(cout, " "));
00743 cout << endl;
00744 print_tree(tree.get(), cout);
00745 }