LILAC
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
hasher.cpp
Go to the documentation of this file.
1 #include "hasher.h"
2 #include <ctime>
3 #include <iostream>
4 #include <stdio.h>
5 hasher::hasher(size_t num_hash, size_t num_rand, vector<float> lens, size_t d_vals, float min_interest):hashmat(lens.size(), num_hash), tor_hashes(d_vals+2), rand_scores(num_rand, num_hash), rand_vecs(num_rand, lens.size()),d_test(d_vals, 0){
6  mnorm = 0;
7  for(size_t i = 0; i < lens.size(); i++){
8  mnorm += lens[i]*lens[i];
9  }
10  mnorm = sqrtf(mnorm);
11  //initialize eash individual hash table
12  tor_hashes[0]=vector<vector<size_t> >(num_hash, vector<size_t>(ceil(mnorm/(min_interest/2.0)), 0));
13  tor_hashes[1]=vector<vector<size_t> >(num_hash, vector<size_t>(ceil(mnorm/min_interest), 0));
14 
15  for(size_t i = 2; i < tor_hashes.size(); i++){
16  min_interest *= 2;
17  float gval = min_interest;
18  size_t num_buckets = ceil(mnorm / gval);
19  tor_hashes[i] = vector<vector<size_t> >(num_hash, vector<size_t>(num_buckets, 0));
20  }
21  size_t dim = lens.size();
22  // rmf rand_vecs(num_rand, dim);
23 
24  //initialize the random position vectors
25  for(size_t i = 0; i < num_rand; i++){
26  for(size_t j = 0; j < dim; j++){
27  rand_vecs(i, j) = (rand()*1.0/RAND_MAX)*lens[j];
28  }
29  }
30  //generate the random hash matrix
31  for(size_t j = 0; j < num_hash; j++){
32  float norm = 0;
33  for(size_t i = 0; i < dim; i++){
34  float rval = rand() * (1.0/RAND_MAX);
35  hashmat(i, j)= rval;
36  norm += rval*rval;
37  }
38  //normalize the vectors
39  //don't believe they need to be
40  norm = 1.0/sqrtf(norm);
41  for(size_t i = 0; i < dim; i++){
42  hashmat(i, j) *= norm;
43  }
44  }
45  rand_scores.noalias()=rand_vecs*hashmat;
46 }
47 
49 }
50 //returns the bin of the float, assuming it is between 0 and mnorm
51 inline size_t hasher::get_bin(float hval, size_t n_bins){
52  size_t stemp = floor(n_bins*hval/mnorm);
53  if(stemp >= n_bins){
54  cout << hval << " " << n_bins << " " << mnorm << endl;
55  }
56  return stemp;
57 }
58 void hasher::hash(const rmf& invecs, cmf& hashes){
59  hashes.noalias()=invecs*hashmat;
60 };
61 void hasher::add_vecs(const rmf& invecs){
62  //hash the input vectors
63  in_vecs.push_back(invecs);
64  cmf temp( invecs*hashmat);
65  add_scores(temp);
66 }
67 void hasher::add_scores(const cmf& hvals){
68  for(size_t g = 0; g < tor_hashes.size(); g++){
69  vector<vector<size_t> >& hvec = tor_hashes[g];
70  for(size_t j = 0; j < (size_t)hvals.cols(); j++){
71  for(size_t i = 0; i < (size_t)hvals.rows(); i++){
72  // increment corresponding hash table bin
73  hvec[j][get_bin(hvals(i, j), hvec[j].size())]++;
74  }
75  }
76  }
77 }
78 vector<float> hasher::get_cur_scores(){
79  const rmf& hvals = rand_scores;
80  vector<float> scores(tor_hashes.size()-2, 0);
81  for(size_t g = 2; g < tor_hashes.size(); g++){
82  vector<vector<size_t> >& hvec = tor_hashes[g];
83  //preceding has table gives probability that distance is more than
84  //current measure
85  vector<vector<size_t> >& hvecm2 = tor_hashes[g-2];
86  //gval hvec = dval[i]*2
87  //gval hvecm1 = dval[i]
88  //gval hvecm2 = dval[i]/2
89  //if(!in hvecm1), pr bad is for 2*gval = dval*2
90  //if(!in hvecm2), pr bad is for 2*gval = dval
91  //if(in hvec) pr good is for gval/2 = dval
92  //therefore need to be minus two hash funcs
93  for(size_t i = 0; i < (size_t)hvals.rows(); i++){
94  float pr_far;
95  size_t num_near = 0;
96  size_t num_far=0;
97  pr_far = 0;
98  size_t num_ave = 0;
99  for(size_t j = 0; j < (size_t)hvals.cols(); j++){
100  // increment corresponding hash table bin
101  // gamma/2, 2*gamma, 1/2, 1/3 sensitive
102  // for now we treat not being near as zero probability of begin near
103  // for now, hold two counts: number of vectors for which
104  // the hash function claims the rvec is near
105  // and number of times the rvec claims it is not near a vector
106  //
107  // if (n_near*((1/2)/(1/3) < n_far)) then
108  // say it is near a vector. Else, it is not
109  size_t num_near_tmp = hvec[j][get_bin(hvals(i, j), hvec[j].size())];
110  long num_far_tmp = (long)hvecm2[j][get_bin(hvals(i, j), hvec[j].size())];
111  num_near += num_near_tmp;
112  //if not zero, make zero. otherwise make 1
113  num_far_tmp = (num_far_tmp == 0);
114  num_far += num_far_tmp;
115  pr_far += (pow(0.5, num_near_tmp) + num_far_tmp);
116  num_ave += (1+num_far_tmp);
117  }
118  //cout << num_near*1.0/hvals.cols() << endl;
119  pr_far/= num_ave;
120  // if(pr_far < 0.2){
121  if(num_far < 1){
122  scores[g-2]++;
123  }
124  }
125  scores[g-2]/=rand_scores.rows();
126  }
127  size_t num_bel_min = 0;
128 
130  for(size_t r = 0; r < (size_t)rand_vecs.rows(); r++){
131  float mindis=mnorm;
132  list<rmf>::iterator lcur;
133  for(lcur=in_vecs.begin(); lcur != in_vecs.end(); lcur++){
134  for(size_t i = 0; i < (size_t)lcur->rows(); i++){
135  float cdist = (rand_vecs.row(r) - lcur->row(i)).norm();
136  if(cdist < mindis){
137  mindis = cdist;
138  }
139  }
140  }
141  if(mindis < 0.25){
142  num_bel_min++;
143  }
144  }
145  // float act_bel_min = (1.0*num_bel_min)/(1.0*rand_vecs.rows());
146  cout << scores[0]*rand_vecs.rows() << ", " << num_bel_min << endl;
147  return scores;
148 }
149 
150 int main()
151 {
152  srand(time(0));
153  size_t dim, num_vec, num_inc;
154  float in_inc, mul_val;
155  num_inc=1000;
156  in_inc=.1;
157  mul_val=3.14159*.7;
158  dim=6;
159  num_vec=500;
160  std::vector<float> lens(dim, 2*3.14159);
161  size_t d_vals = 5;
162  float min_int = 0.25;
163  hasher hh(15, 10000, lens, d_vals, min_int);
164  rmf vvals(rmf::Zero(num_vec, dim));
165  for(size_t g =0; g < num_inc; g++){
166  //copy most recent row into beginning
167  float cur_inc=in_inc;
168  for(size_t j = 0; j < dim; j++){
169  vvals(0, j) = fmod(vvals(num_vec-1, j)+cur_inc, lens[j]);
170  cur_inc *= mul_val;
171  }
172  //create the matrix of search vectors
173  for(size_t i = 1; i < num_vec; i++){
174  cur_inc = in_inc;
175  for(size_t j = 0; j < dim; j++){
176  vvals(i, j) = fmod(vvals(i-1, j)+cur_inc, lens[j]);
177  cur_inc *= mul_val;
178  }
179  }
180  hh.add_vecs(vvals);
181  cout << "getting_scores" << endl;
182  vector<float> scores = hh.get_cur_scores();
183  cout << scores[0] << endl;
184  }
185  hh.get_cur_scores();
186 }
187