Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ads/learned-zindex
1 result
Show changes
Commits on Source (2)
.vscode/settings.json
LZC/.DS_Store
.DS_Store
*.out
*.o
\ No newline at end of file
...@@ -142,7 +142,7 @@ class FloodLite ...@@ -142,7 +142,7 @@ class FloodLite
std::vector<std::pair<bool,uint32_t>> candidate_configs = ConfigurationsFromQueries(queries); std::vector<std::pair<bool,uint32_t>> candidate_configs = ConfigurationsFromQueries(queries);
std::shuffle(candidate_configs.begin(),candidate_configs.end(),g); std::shuffle(candidate_configs.begin(),candidate_configs.end(),g);
std::cout<<" There are "<<candidate_configs.size()<<" candidate configs."<<std::endl; std::cout<<" There are "<<candidate_configs.size()<<" candidate configs."<<std::endl;
uint32_t num_configs_to_try = 30; uint32_t num_configs_to_try = 50;
std::vector<std::pair<Point,Point>> query_subsample(queries.begin(),queries.begin()+uint32_t(M*0.005)); std::vector<std::pair<Point,Point>> query_subsample(queries.begin(),queries.begin()+uint32_t(M*0.005));
for(std::pair<bool,uint32_t> &conf: candidate_configs){ for(std::pair<bool,uint32_t> &conf: candidate_configs){
......
...@@ -50,10 +50,8 @@ int main(int argc, char* argv[]) ...@@ -50,10 +50,8 @@ int main(int argc, char* argv[])
ifstream pointsfile(data_path+data_folder+"/DataPoints/"+point_class+"/"+point_file); ifstream pointsfile(data_path+data_folder+"/DataPoints/"+point_class+"/"+point_file);
while ( pointsfile >> a >> b) while ( pointsfile >> a >> b)
data.push_back(Point(a,b)); data.push_back(Point(a,b));
pointsfile.close(); pointsfile.close();
cout<<"Finished reading data "<<data.size()<<endl;
vector<BoundingRectangle> queries; vector<BoundingRectangle> queries;
ifstream queriesfile(data_path+data_folder+"/Queries/RangeQueries/"+query_file); ifstream queriesfile(data_path+data_folder+"/Queries/RangeQueries/"+query_file);
...@@ -61,6 +59,7 @@ int main(int argc, char* argv[]) ...@@ -61,6 +59,7 @@ int main(int argc, char* argv[])
queries.push_back(BoundingRectangle(Point(a,b),Point(c,d))); queries.push_back(BoundingRectangle(Point(a,b),Point(c,d)));
queriesfile.close(); queriesfile.close();
vector<Point> knn_queries; vector<Point> knn_queries;
ifstream knn_queriesfile(data_path+data_folder+"/Queries/KnnQueries/"+point_class); ifstream knn_queriesfile(data_path+data_folder+"/Queries/KnnQueries/"+point_class);
while (knn_queriesfile >> a >> b) while (knn_queriesfile >> a >> b)
...@@ -74,7 +73,6 @@ int main(int argc, char* argv[]) ...@@ -74,7 +73,6 @@ int main(int argc, char* argv[])
while (point_queriesfile >> a >> b) while (point_queriesfile >> a >> b)
point_queries.push_back(Point(a,b)); point_queries.push_back(Point(a,b));
point_queriesfile.close(); point_queriesfile.close();
cout<<"Point Query size"<<point_queries.size()<<endl;
uint32_t insert_rq_size = uint32_t(queries.size()*0.05); uint32_t insert_rq_size = uint32_t(queries.size()*0.05);
...@@ -487,8 +485,6 @@ int main(int argc, char* argv[]) ...@@ -487,8 +485,6 @@ int main(int argc, char* argv[])
for(auto &k : k_values){ for(auto &k : k_values){
waziplus_eval_obj.ClearMetric(); waziplus_eval_obj.ClearMetric();
waziplus_json["k"] = k; waziplus_json["k"] = k;
auto waziplus_eval_start = std::chrono::high_resolution_clock::now(); auto waziplus_eval_start = std::chrono::high_resolution_clock::now();
...@@ -509,7 +505,7 @@ int main(int argc, char* argv[]) ...@@ -509,7 +505,7 @@ int main(int argc, char* argv[])
} }
} }
cout<<"Done with WAZIPLUS knn queries."<<endl;
{ /** Point queries */ { /** Point queries */
json waziplus_json; json waziplus_json;
...@@ -532,7 +528,7 @@ int main(int argc, char* argv[]) ...@@ -532,7 +528,7 @@ int main(int argc, char* argv[])
o << waziplus_json << std::endl; o << waziplus_json << std::endl;
o.close(); o.close();
} }
cout<<"Done with WAZIPLUS point queries."<<endl;
{ /** Insert queries */ { /** Insert queries */
json waziplus_json; json waziplus_json;
...@@ -541,10 +537,8 @@ int main(int argc, char* argv[]) ...@@ -541,10 +537,8 @@ int main(int argc, char* argv[])
waziplus_json["point_file"] = point_file; waziplus_json["point_file"] = point_file;
waziplus_json["config_id"] = string(argv[1]); waziplus_json["config_id"] = string(argv[1]);
vector<uint64_t> insert_times; vector<uint64_t> insert_times;
vector<uint64_t> range_query_times; vector<uint64_t> range_query_times;
uint32_t ins_ix=0; uint32_t ins_ix=0;
for(int ins_epoch =0;ins_epoch<5;ins_epoch++){ for(int ins_epoch =0;ins_epoch<5;ins_epoch++){
...@@ -571,7 +565,6 @@ int main(int argc, char* argv[]) ...@@ -571,7 +565,6 @@ int main(int argc, char* argv[])
o << waziplus_json << std::endl; o << waziplus_json << std::endl;
o.close(); o.close();
} }
cout<<"Done with WAZIPLUS insert queries."<<endl;
} }
cout<<"WAZIPLUS DONE\n"; cout<<"WAZIPLUS DONE\n";
......
...@@ -34,12 +34,13 @@ cnt = 1 ...@@ -34,12 +34,13 @@ cnt = 1
confPath = experimentPath/'config' confPath = experimentPath/'config'
confPath.mkdir(parents=True,exist_ok=True) confPath.mkdir(parents=True,exist_ok=True)
point_class_list = ['CaliNev','Iberian','Japan','NewYork'] point_class_list = parsed_toml['datagen']['data_dist']
rsmi_build_list = []
for datasize in parsed_toml['datagen']['dataset_size']: for datasize in parsed_toml['datagen']['dataset_size']:
for selec in parsed_toml['datagen']['query_selectivity']: for point_class in point_class_list:
for point_class in point_class_list: rsmi_build_list.append(cnt)
for selec in parsed_toml['datagen']['query_selectivity']:
config_dict['point_class'] = point_class config_dict['point_class'] = point_class
config_dict['point_file'] = str(datasize) config_dict['point_file'] = str(datasize)
...@@ -51,6 +52,9 @@ for datasize in parsed_toml['datagen']['dataset_size']: ...@@ -51,6 +52,9 @@ for datasize in parsed_toml['datagen']['dataset_size']:
toml.dump(config_dict, f) toml.dump(config_dict, f)
cnt+=1 cnt+=1
rsmi_build_list.append('')
with open(experimentPath/'RSMI_build_list', 'w') as f:
f.write('\n'.join([str(a) for a in rsmi_build_list]))
sys.stdout.write(str(cnt-1)) sys.stdout.write(str(cnt-1))
......
...@@ -176,108 +176,100 @@ int main(int argc, char* argv[]) ...@@ -176,108 +176,100 @@ int main(int argc, char* argv[])
} }
cout<<"Finished Range query QUILTS"<<endl; cout<<"Finished Range query QUILTS"<<endl;
if(atoi(argv[4])){
cout<<"Starting Knn"<<endl; cout<<"Starting Knn"<<endl;
{ // KNN querys { // KNN querys
json quilts_json; json quilts_json;
quilts_json["model"]="QUILTS"; quilts_json["model"]="QUILTS";
quilts_json["point_class"] = point_class; quilts_json["point_class"] = point_class;
quilts_json["point_file"] = point_file; quilts_json["point_file"] = point_file;
quilts_json["config_id"] = string(argv[1]); quilts_json["config_id"] = string(argv[1]);
vector<Point> knn_query_result; vector<Point> knn_query_result;
for(auto &k : k_values){ for(auto &k : k_values){
quilts_json["k"] = k; quilts_json["k"] = k;
quilts_obj.ClearMetric(); quilts_obj.ClearMetric();
auto quilts_eval_start = std::chrono::high_resolution_clock::now();
for(auto &query: knn_queries){
// cout<<"KNN query "<<query.x_<<" "<<query.y_<<endl;
knn_query_result = quilts_obj.KNNQuery(query,k);
}
auto quilts_eval_end = std::chrono::high_resolution_clock::now();
quilts_json["knn_query_time"]=chrono::duration_cast<chrono::nanoseconds>(quilts_eval_end - quilts_eval_start).count()/knn_queries.size();
quilts_json["knn_query_scantime"] = quilts_obj.TimeSpentScanningPages()/knn_queries.size();
quilts_json["knn_query_page_accessed"]=quilts_obj.NumPagesAcessed()/knn_queries.size();
quilts_json["knn_query_points_scanned"]=quilts_obj.NumElementsScanned()/knn_queries.size();
std::cout<<"QUILTS knn k:"<<k<<std::endl;
ofstream o(data_path+data_folder+"/Experiments/result/KNN.json",ios_base::app);
o << quilts_json << std::endl;
o.close();
}
}
cout<<"KNN DOne"<<endl;
{ // Point queries
json quilts_json;
quilts_json["model"]="QUILTS";
quilts_json["point_class"] = point_class;
quilts_json["point_file"] = point_file;
quilts_json["config_id"] = string(argv[1]);
bool point_query_result;
auto quilts_eval_start = std::chrono::high_resolution_clock::now(); auto quilts_eval_start = std::chrono::high_resolution_clock::now();
for(auto &query: point_queries){ for(auto &query: knn_queries){
point_query_result = quilts_obj.PointQuery(query); knn_query_result = quilts_obj.KNNQuery(query,k);
} }
auto quilts_eval_end = std::chrono::high_resolution_clock::now(); auto quilts_eval_end = std::chrono::high_resolution_clock::now();
quilts_json["knn_query_time"]=chrono::duration_cast<chrono::nanoseconds>(quilts_eval_end - quilts_eval_start).count()/knn_queries.size();
quilts_json["knn_query_scantime"] = quilts_obj.TimeSpentScanningPages()/knn_queries.size();
quilts_json["knn_query_page_accessed"]=quilts_obj.NumPagesAcessed()/knn_queries.size();
quilts_json["knn_query_points_scanned"]=quilts_obj.NumElementsScanned()/knn_queries.size();
quilts_json["point_query_time"]=chrono::duration_cast<chrono::nanoseconds>(quilts_eval_end - quilts_eval_start).count()/point_queries.size(); ofstream o(data_path+data_folder+"/Experiments/result/KNN.json",ios_base::app);
ofstream o(data_path+data_folder+"/Experiments/result/Point.json",ios_base::app);
o << quilts_json << std::endl; o << quilts_json << std::endl;
o.close(); o.close();
} }
}
{ // Point queries
json quilts_json;
quilts_json["model"]="QUILTS";
quilts_json["point_class"] = point_class;
quilts_json["point_file"] = point_file;
quilts_json["config_id"] = string(argv[1]);
cout<<"Point Queries DOne"<<endl; bool point_query_result;
{ // Insert queries
json quilts_json;
quilts_json["model"]="QUILTS";
quilts_json["point_class"] = point_class;
quilts_json["point_file"] = point_file;
quilts_json["config_id"] = string(argv[1]);
vector<uint64_t> insert_times;
vector<uint64_t> range_query_times;
uint32_t ins_ix=0;
for(int ins_epoch =0;ins_epoch<5;ins_epoch++){ auto quilts_eval_start = std::chrono::high_resolution_clock::now();
for(auto &query: point_queries){
point_query_result = quilts_obj.PointQuery(query);
}
auto quilts_eval_end = std::chrono::high_resolution_clock::now();
quilts_json["point_query_time"]=chrono::duration_cast<chrono::nanoseconds>(quilts_eval_end - quilts_eval_start).count()/point_queries.size();
ofstream o(data_path+data_folder+"/Experiments/result/Point.json",ios_base::app);
o << quilts_json << std::endl;
o.close();
}
auto quilts_eval_start = std::chrono::high_resolution_clock::now();
for(int j=0;j<insert_increments;j++,ins_ix++){
quilts_obj.InsertElement(insert_queries[ins_ix]);
}
auto quilts_eval_end = std::chrono::high_resolution_clock::now();
insert_times.push_back(chrono::duration_cast<chrono::nanoseconds>(quilts_eval_end - quilts_eval_start).count()/insert_increments);
{ // Insert queries
json quilts_json;
quilts_json["model"]="QUILTS";
quilts_json["point_class"] = point_class;
quilts_json["point_file"] = point_file;
quilts_json["config_id"] = string(argv[1]);
vector<uint64_t> insert_times;
vector<uint64_t> range_query_times;
uint32_t ins_ix=0;
for(int ins_epoch =0;ins_epoch<5;ins_epoch++){
auto quilts_rq_eval_start = std::chrono::high_resolution_clock::now(); auto quilts_eval_start = std::chrono::high_resolution_clock::now();
for(int i=0;i<insert_rq_size;i++){ for(int j=0;j<insert_increments;j++,ins_ix++){
vector<Point> range_query_result = quilts_obj.RangeQuery(queries[i]); quilts_obj.InsertElement(insert_queries[ins_ix]);
}
auto quilts_rq_eval_end = std::chrono::high_resolution_clock::now();
range_query_times.push_back(chrono::duration_cast<chrono::nanoseconds>(quilts_rq_eval_end - quilts_rq_eval_start).count()/insert_rq_size);
} }
auto quilts_eval_end = std::chrono::high_resolution_clock::now();
insert_times.push_back(chrono::duration_cast<chrono::nanoseconds>(quilts_eval_end - quilts_eval_start).count()/insert_increments);
quilts_json["insert_query_time"]= insert_times;
quilts_json["range_query_times"]= range_query_times;
ofstream o(data_path+data_folder+"/Experiments/result/Insert.json",ios_base::app);
o << quilts_json << std::endl; auto quilts_rq_eval_start = std::chrono::high_resolution_clock::now();
o.close(); for(int i=0;i<insert_rq_size;i++){
vector<Point> range_query_result = quilts_obj.RangeQuery(queries[i]);
}
auto quilts_rq_eval_end = std::chrono::high_resolution_clock::now();
range_query_times.push_back(chrono::duration_cast<chrono::nanoseconds>(quilts_rq_eval_end - quilts_rq_eval_start).count()/insert_rq_size);
} }
cout<<"Insert DOne"<<endl;
quilts_json["insert_query_time"]= insert_times;
quilts_json["range_query_times"]= range_query_times;
ofstream o(data_path+data_folder+"/Experiments/result/Insert.json",ios_base::app);
o << quilts_json << std::endl;
o.close();
} }
cout<<"QUILTS DONE\n"; cout<<"QUILTS DONE\n";
......
...@@ -5,14 +5,16 @@ This repository hosts the code and extended version of the article: ...@@ -5,14 +5,16 @@ This repository hosts the code and extended version of the article:
The repository holds the C++ implementation for Z-Indices used for experimentation. The repository holds the C++ implementation for Z-Indices used for experimentation.
## Instruction for running experiments used in the paper. ## Instruction for running experiments used in the paper.
- Make changes to configuration file 'config.toml' file, if any. - Make changes to configuration file 'config.toml' file, if any.
- Provide a respective data ($\mathcal{D}$) and query distribution ($\mathcal{Q}$) in `Dataset` folder. - Provide a respective data ($\mathcal{D}$) and query distribution ($\mathcal{Q}$) in `Dataset` folder.
- A 4M version of $CaliNev$ dataset used in paper is provided as an example. - $CaliNev$ dataset used in paper is provided within repository for example.
- All experimental results associated could be generated by running the `endtoend.sh` script. - All experimental results associated could be generated by running the `endtoend.sh` script with the data folder (set in 'config.toml') as commandline argument
``` bash ``` bash
$ bash endtoend.sh test $ bash endtoend.sh RealWorld
``` ```
- Generates the necessary data and query files. - Generates the necessary data and query files.
...@@ -20,12 +22,22 @@ The repository holds the C++ implementation for Z-Indices used for experimentati ...@@ -20,12 +22,22 @@ The repository holds the C++ implementation for Z-Indices used for experimentati
- Creates the required files from $\mathcal{D}$ and $\mathcal{Q}$. - Creates the required files from $\mathcal{D}$ and $\mathcal{Q}$.
- Sets up config files for each of the configs. - Sets up config files for each of the configs.
- Builds and evaluates the respective models. - Builds and evaluates the respective models.
- The raw output of experiments are stored in `Dataset/result/heur.json` - The raw output of experiments are stored in `Dataset/RealWorld/Experiments/result/` as json files. Each json refers to a specific config.
- A plot representative to the figures in the paper are also plotted and stored in `Dataset/plots` - A plot representative to the figures in the paper are also plotted and stored in `Dataset/RealWorld/Experiments/plots`
## Reproducing the experiments
- For the sake of reproducability we provide the code utilized for generating all the results presented in our paper.
- We also have added the data distribution and query distribution for CaliNev dataset as git lfs files in the repo.
- For the other three data and query distributions run
``` bash
$ bash Preprocess/fetch_other_dataset.sh
```
## Authors and acknowledgment ## Authors and acknowledgment
The code was created and maintained by Sachith Pai. The code is created and maintained by Sachith Pai (sachith.pai@helsinki.fi).
The work was supported by the Academy of Finland. The work was supported by the Academy of Finland.
......
...@@ -3,17 +3,10 @@ SRCS=$(wildcard *.cpp */*.cpp) ...@@ -3,17 +3,10 @@ SRCS=$(wildcard *.cpp */*.cpp)
OBJS=$(patsubst %.cpp, %.o, $(SRCS)) OBJS=$(patsubst %.cpp, %.o, $(SRCS))
TYPE = ukko2 # Update the libtorch sources in the following lines
# for linux INCLUDE = -I/wrk-vakka/users/sachith/libtorch/include -I/wrk-vakka/users/sachith/libtorch/include/torch/csrc/api/include
ifeq ($(TYPE), puhti) LIB += -L/wrk-vakka/users/sachith/libtorch/lib -ltorch_cpu -lc10 -lpthread
INCLUDE = -I/Users/sachith/Work/libtorch1.4.0/include -I/Users/sachith/Work/libtorch1.4.0/include/torch/csrc/api/include FLAG = -Wl,-rpath=/wrk-vakka/users/sachith/libtorch/lib
LIB +=-L/Users/sachith/Work/libtorch1.4.0/lib -ltorch -lc10 -lpthread
FLAG = -Wl,-rpath,/Users/sachith/Work/libtorch1.4.0/lib
else
INCLUDE = -I/wrk-vakka/users/sachith/libtorch/include -I/wrk-vakka/users/sachith/libtorch/include/torch/csrc/api/include
LIB += -L/wrk-vakka/users/sachith/libtorch/lib -ltorch_cpu -lc10 -lpthread
FLAG = -Wl,-rpath=/wrk-vakka/users/sachith/libtorch/lib
endif
...@@ -27,8 +20,4 @@ $(TARGET):$(OBJS) ...@@ -27,8 +20,4 @@ $(TARGET):$(OBJS)
$(CC) -o $@ -c $< -g $(INCLUDE) $(CC) -o $@ -c $< -g $(INCLUDE)
clean: clean:
rm -rf $(TARGET) $(OBJS) rm -rf $(TARGET) $(OBJS)
\ No newline at end of file
# # g++ -std=c++11 Exp.cpp FileReader.o -ltensorflow -o Exp_tf
# # -I/wrk-vakka/users/sachith/boost_1_78_0
# # /wrk-vakka/group/ads-common/pytorch-install/
...@@ -39,7 +39,7 @@ void BuildRSMI(string model_path,vector<Point> &points,string dataFolder, string ...@@ -39,7 +39,7 @@ void BuildRSMI(string model_path,vector<Point> &points,string dataFolder, string
cout<<"Finished building RSMI"<<endl; cout<<"Finished building RSMI"<<endl;
double buildTime = chrono::duration_cast<chrono::seconds> (finish - start).count(); double buildTime = chrono::duration_cast<chrono::seconds> (finish - start).count();
string buildtime_root_path = "../Datasets/"+dataFolder+ "/RSMI/buildtime/"+point_class+"/"; string buildtime_root_path = "./Datasets/"+dataFolder+ "/RSMI/buildtime/"+point_class+"/";
file_utils::check_dir(buildtime_root_path); file_utils::check_dir(buildtime_root_path);
ofstream o(buildtime_root_path+point_file+".txt",ios_base::trunc); ofstream o(buildtime_root_path+point_file+".txt",ios_base::trunc);
o << buildTime << std::endl; o << buildTime << std::endl;
...@@ -47,7 +47,7 @@ void BuildRSMI(string model_path,vector<Point> &points,string dataFolder, string ...@@ -47,7 +47,7 @@ void BuildRSMI(string model_path,vector<Point> &points,string dataFolder, string
void RSMI_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_queries,json rsmi_json, string dataFolder, string experimentName){ void RSMI_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_queries,json rsmi_json, string dataFolder){
ExpRecorder exp_recorder; ExpRecorder exp_recorder;
RSMI::model_path_root = model_path; RSMI::model_path_root = model_path;
RSMI *partition = new RSMI(0, Constants::MAX_WIDTH); RSMI *partition = new RSMI(0, Constants::MAX_WIDTH);
...@@ -56,7 +56,7 @@ void RSMI_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_ ...@@ -56,7 +56,7 @@ void RSMI_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_
double buildTime; double buildTime;
ifstream inp("../Datasets/"+dataFolder+ "/RSMI/buildtime/"+rsmi_json["point_class"].get<std::string>()+"/"+rsmi_json["point_file"].get<std::string>()+".txt",ios_base::in); ifstream inp("./Datasets/"+dataFolder+ "/RSMI/buildtime/"+rsmi_json["point_class"].get<std::string>()+"/"+rsmi_json["point_file"].get<std::string>()+".txt",ios_base::in);
inp>>buildTime; inp>>buildTime;
inp.close(); inp.close();
rsmi_json["build_time"]=buildTime; rsmi_json["build_time"]=buildTime;
...@@ -86,7 +86,7 @@ void RSMI_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_ ...@@ -86,7 +86,7 @@ void RSMI_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_
rsmi_json["range_query_page_accessed"]=exp_recorder.acc_window_query_page_access; rsmi_json["range_query_page_accessed"]=exp_recorder.acc_window_query_page_access;
rsmi_json["range_result_size"] = exp_recorder.acc_window_query_result_size; rsmi_json["range_result_size"] = exp_recorder.acc_window_query_result_size;
ofstream o("../Datasets/"+dataFolder+"/Experiments/"+experimentName+"/result/Range.json",ios_base::app); ofstream o("./Datasets/"+dataFolder+"/Experiments/result/Range.json",ios_base::app);
o << rsmi_json << std::endl; o << rsmi_json << std::endl;
} }
...@@ -111,7 +111,7 @@ void RSMI_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_ ...@@ -111,7 +111,7 @@ void RSMI_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_
rsmi_json["range_query_page_accessed"]=exp_recorder.window_query_page_access; rsmi_json["range_query_page_accessed"]=exp_recorder.window_query_page_access;
rsmi_json["range_result_size"] = exp_recorder.window_query_result_size; rsmi_json["range_result_size"] = exp_recorder.window_query_result_size;
ofstream o("../Datasets/"+dataFolder+"/Experiments/"+experimentName+"/result/Range.json",ios_base::app); ofstream o("./Datasets/"+dataFolder+"/Experiments/result/Range.json",ios_base::app);
o << rsmi_json << std::endl; o << rsmi_json << std::endl;
} }
...@@ -119,7 +119,7 @@ void RSMI_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_ ...@@ -119,7 +119,7 @@ void RSMI_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_
} }
void RSMI_knn_point_insert(string model_path,vector<Point> &points,json jsonprefix, string dataFolder, string experimentName,vector<Point>& knn_queries,vector<Point>& point_queries,vector<Point>& insert_queries,vector<uint32_t> k_values,vector<Mbr> &range_queries){ void RSMI_knn_point_insert(string model_path,vector<Point> &points,json jsonprefix, string dataFolder,vector<Point>& knn_queries,vector<Point>& point_queries,vector<Point>& insert_queries,vector<uint32_t> k_values,vector<Mbr> &range_queries){
ExpRecorder exp_recorder; ExpRecorder exp_recorder;
RSMI::model_path_root = model_path; RSMI::model_path_root = model_path;
RSMI *partition = new RSMI(0, Constants::MAX_WIDTH); RSMI *partition = new RSMI(0, Constants::MAX_WIDTH);
...@@ -155,7 +155,7 @@ void RSMI_knn_point_insert(string model_path,vector<Point> &points,json jsonpref ...@@ -155,7 +155,7 @@ void RSMI_knn_point_insert(string model_path,vector<Point> &points,json jsonpref
// rsmi_json["knn_query_points_scanned"]=exp_recorder.acc_knn_query_scan_size; // rsmi_json["knn_query_points_scanned"]=exp_recorder.acc_knn_query_scan_size;
// ofstream o("../Datasets/"+dataFolder+"/Experiments/"+experimentName+"/result/KNN.json",ios_base::app); // ofstream o("./Datasets/"+dataFolder+"/Experiments/result/KNN.json",ios_base::app);
// o << rsmi_json << std::endl; // o << rsmi_json << std::endl;
// o.close(); // o.close();
...@@ -187,7 +187,7 @@ void RSMI_knn_point_insert(string model_path,vector<Point> &points,json jsonpref ...@@ -187,7 +187,7 @@ void RSMI_knn_point_insert(string model_path,vector<Point> &points,json jsonpref
rsmi_json["knn_query_points_scanned"]=exp_recorder.knn_query_scan_size; rsmi_json["knn_query_points_scanned"]=exp_recorder.knn_query_scan_size;
ofstream o("../Datasets/"+dataFolder+"/Experiments/"+experimentName+"/result/KNN.json",ios_base::app); ofstream o("./Datasets/"+dataFolder+"/Experiments/result/KNN.json",ios_base::app);
o << rsmi_json << std::endl; o << rsmi_json << std::endl;
o.close(); o.close();
...@@ -208,7 +208,7 @@ void RSMI_knn_point_insert(string model_path,vector<Point> &points,json jsonpref ...@@ -208,7 +208,7 @@ void RSMI_knn_point_insert(string model_path,vector<Point> &points,json jsonpref
rsmi_json["point_query_time"]=chrono::duration_cast<chrono::nanoseconds>(rsmia_point_eval_end - rsmia_point_eval_start).count()/point_queries.size(); rsmi_json["point_query_time"]=chrono::duration_cast<chrono::nanoseconds>(rsmia_point_eval_end - rsmia_point_eval_start).count()/point_queries.size();
ofstream o("../Datasets/"+dataFolder+"/Experiments/"+experimentName+"/result/Point.json",ios_base::app); ofstream o("./Datasets/"+dataFolder+"/Experiments/result/Point.json",ios_base::app);
o << rsmi_json << std::endl; o << rsmi_json << std::endl;
o.close(); o.close();
} }
...@@ -247,7 +247,7 @@ void RSMI_knn_point_insert(string model_path,vector<Point> &points,json jsonpref ...@@ -247,7 +247,7 @@ void RSMI_knn_point_insert(string model_path,vector<Point> &points,json jsonpref
rsmi_json["insert_query_time"]= insert_times; rsmi_json["insert_query_time"]= insert_times;
rsmi_json["range_query_times"]= range_query_times; rsmi_json["range_query_times"]= range_query_times;
ofstream o("../Datasets/"+dataFolder+"/Experiments/"+experimentName+"/result/Insert.json",ios_base::app); ofstream o("./Datasets/"+dataFolder+"/Experiments/result/Insert.json",ios_base::app);
o << rsmi_json << std::endl; o << rsmi_json << std::endl;
o.close(); o.close();
} }
...@@ -260,7 +260,7 @@ void RSMI_knn_point_insert(string model_path,vector<Point> &points,json jsonpref ...@@ -260,7 +260,7 @@ void RSMI_knn_point_insert(string model_path,vector<Point> &points,json jsonpref
void HRR_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_queries,json hrr_json, string dataFolder, string experimentName){ void HRR_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_queries,json hrr_json, string dataFolder){
ExpRecorder exp_recorder; ExpRecorder exp_recorder;
...@@ -288,7 +288,7 @@ void HRR_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_q ...@@ -288,7 +288,7 @@ void HRR_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_q
hrr_json["range_query_page_accessed"]=exp_recorder.hrr_window_query_page_access; hrr_json["range_query_page_accessed"]=exp_recorder.hrr_window_query_page_access;
hrr_json["range_result_size"] = exp_recorder.hrr_window_query_result_size; hrr_json["range_result_size"] = exp_recorder.hrr_window_query_result_size;
ofstream o("../Datasets/"+dataFolder+"/Experiments/"+experimentName+"/result/Range.json",ios_base::app); ofstream o("./Datasets/"+dataFolder+"/Experiments/result/Range.json",ios_base::app);
o << hrr_json << std::endl; o << hrr_json << std::endl;
...@@ -297,7 +297,7 @@ void HRR_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_q ...@@ -297,7 +297,7 @@ void HRR_rangequery(string model_path,vector<Point> &points,vector<Mbr> &range_q
void HRR_knn_point_insert(string model_path,vector<Point> &points,json jsonprefix, string dataFolder, string experimentName,vector<Point>& knn_queries,vector<Point>& point_queries,vector<Point>& insert_queries,vector<uint32_t> k_values,vector<Mbr> &range_queries){ void HRR_knn_point_insert(string model_path,vector<Point> &points,json jsonprefix, string dataFolder,vector<Point>& knn_queries,vector<Point>& point_queries,vector<Point>& insert_queries,vector<uint32_t> k_values,vector<Mbr> &range_queries){
ExpRecorder exp_recorder; ExpRecorder exp_recorder;
HRR *hrr = new HRR(0.7*Constants::PAGESIZE); HRR *hrr = new HRR(0.7*Constants::PAGESIZE);
...@@ -333,11 +333,11 @@ void HRR_knn_point_insert(string model_path,vector<Point> &points,json jsonprefi ...@@ -333,11 +333,11 @@ void HRR_knn_point_insert(string model_path,vector<Point> &points,json jsonprefi
hrr_json["knn_query_points_scanned"]=exp_recorder.hrr_knn_query_scan_size; hrr_json["knn_query_points_scanned"]=exp_recorder.hrr_knn_query_scan_size;
ofstream o("../Datasets/"+dataFolder+"/Experiments/"+experimentName+"/result/KNN.json",ios_base::app); ofstream o("./Datasets/"+dataFolder+"/Experiments/result/KNN.json",ios_base::app);
o << hrr_json << std::endl; o << hrr_json << std::endl;
o.close(); o.close();
cout<<"HRR knn "<<k<<endl; // cout<<"HRR knn "<<k<<endl;
} }
} }
...@@ -354,7 +354,7 @@ void HRR_knn_point_insert(string model_path,vector<Point> &points,json jsonprefi ...@@ -354,7 +354,7 @@ void HRR_knn_point_insert(string model_path,vector<Point> &points,json jsonprefi
hrr_json["point_query_time"]=chrono::duration_cast<chrono::nanoseconds>(hrr_point_eval_end - hrr_point_eval_start).count()/point_queries.size(); hrr_json["point_query_time"]=chrono::duration_cast<chrono::nanoseconds>(hrr_point_eval_end - hrr_point_eval_start).count()/point_queries.size();
ofstream o("../Datasets/"+dataFolder+"/Experiments/"+experimentName+"/result/Point.json",ios_base::app); ofstream o("./Datasets/"+dataFolder+"/Experiments/result/Point.json",ios_base::app);
o << hrr_json << std::endl; o << hrr_json << std::endl;
o.close(); o.close();
} }
...@@ -392,7 +392,7 @@ void HRR_knn_point_insert(string model_path,vector<Point> &points,json jsonprefi ...@@ -392,7 +392,7 @@ void HRR_knn_point_insert(string model_path,vector<Point> &points,json jsonprefi
hrr_json["insert_query_time"]= insert_times; hrr_json["insert_query_time"]= insert_times;
hrr_json["range_query_times"]= range_query_times; hrr_json["range_query_times"]= range_query_times;
ofstream o("../Datasets/"+dataFolder+"/Experiments/"+experimentName+"/result/Insert.json",ios_base::app); ofstream o("./Datasets/"+dataFolder+"/Experiments/result/Insert.json",ios_base::app);
o << hrr_json << std::endl; o << hrr_json << std::endl;
o.close(); o.close();
} }
...@@ -407,15 +407,9 @@ int main(int argc, char* argv[]) ...@@ -407,15 +407,9 @@ int main(int argc, char* argv[])
{ {
//fetching config from tomlfile variables. //fetching config from tomlfile variables.
const auto dataFolder = string(argv[2]); const auto dataFolder = string(argv[2]);
const auto experimentName = string(argv[3]);
// argv[4] tells what kind of queries to run.
// 0 = Build RSMI store model and build time
// 1 = Load RSMI perform range queries + build HRR range queries
// 2 = Load RSMI perform + range queries + KNN queries + Point Queries + Insert Queries +
// Build HRR perform + range queries + KNN queries + Point Queries + Insert Queries
const auto config = toml::parse("../Datasets/"+dataFolder+"/Experiments/"+experimentName+"/config/"+string(argv[1])+".toml"); const auto config = toml::parse("./Datasets/"+dataFolder+"/Experiments/config/"+string(argv[1])+".toml");
const auto point_class = toml::find<std::string>(config, "point_class"); const auto point_class = toml::find<std::string>(config, "point_class");
const auto point_file = toml::find<std::string>(config, "point_file"); const auto point_file = toml::find<std::string>(config, "point_file");
...@@ -424,26 +418,26 @@ int main(int argc, char* argv[]) ...@@ -424,26 +418,26 @@ int main(int argc, char* argv[])
cout<<"Starting loading data\n"; cout<<"Starting loading data\n";
cout.flush(); cout.flush();
string dataset_filename = "../Datasets/"+dataFolder+"/DataPoints/"+point_class+"/"+point_file; string dataset_filename = "./Datasets/"+dataFolder+"/DataPoints/"+point_class+"/"+point_file;
FileReader filereader(dataset_filename, " "); FileReader filereader(dataset_filename, " ");
vector<Point> points = filereader.get_points(); vector<Point> points = filereader.get_points();
cout<<"Finished points from "<<("../Datasets/"+dataFolder+"/DataPoints/"+point_class+"/"+point_file+".txt")<<endl; cout<<"Finished points from "<<("./Datasets/"+dataFolder+"/DataPoints/"+point_class+"/"+point_file+".txt")<<endl;
FileReader query_filereader; FileReader query_filereader;
string model_root_path = "../Datasets/"+dataFolder+ "/RSMI/torch_models/"+point_class+"/"+point_file; string model_root_path = "./Datasets/"+dataFolder+ "/Experiments/trees/RSMI/torch_models/"+point_class+"/"+point_file;
file_utils::check_dir(model_root_path); file_utils::check_dir(model_root_path);
string model_path = model_root_path + "/"; string model_path = model_root_path + "/";
FileWriter file_writer("../Datasets/"+dataFolder + "/RSMI/"); FileWriter file_writer("./Datasets/"+dataFolder + "/RSMI/");
cout<<"Finished loading data "<<points.size()<<endl; cout<<"Finished loading data "<<points.size()<<endl;
if(atoi(argv[4])==0){ if(atoi(argv[3])==0){
BuildRSMI(model_path,points,dataFolder,point_class,point_file); BuildRSMI(model_path,points,dataFolder,point_class,point_file);
return 0; return 0;
} }
...@@ -456,40 +450,36 @@ int main(int argc, char* argv[]) ...@@ -456,40 +450,36 @@ int main(int argc, char* argv[])
rsmi_json["config_id"] = string(argv[1]); rsmi_json["config_id"] = string(argv[1]);
string query_filename = "../Datasets/"+dataFolder+"/Queries/RangeQueries/"+query_file; string query_filename = "./Datasets/"+dataFolder+"/Queries/RangeQueries/"+query_file;
vector<Mbr> range_queries = query_filereader.get_mbrs(query_filename, " "); vector<Mbr> range_queries = query_filereader.get_mbrs(query_filename, " ");
bool RSMI_or_HRR = atoi(argv[5]); bool RSMI_or_HRR = atoi(argv[4]);
bool is_64 = (point_file!="64");
if(atoi(argv[4])>=1){ if(atoi(argv[3])>=1){
if(RSMI_or_HRR && is_64 ) RSMI_rangequery(model_path,points,range_queries,rsmi_json,dataFolder,experimentName); if(RSMI_or_HRR ) RSMI_rangequery(model_path,points,range_queries,rsmi_json,dataFolder);
else HRR_rangequery(model_path,points,range_queries,rsmi_json,dataFolder,experimentName); else HRR_rangequery(model_path,points,range_queries,rsmi_json,dataFolder);
}
string knn_filename = "./Datasets/"+dataFolder+"/Queries/KnnQueries/"+point_class;
cout<<"Done with Range queries.\n";
if(atoi(argv[4])==2){
string knn_filename = "../Datasets/"+dataFolder+"/Queries/KnnQueries/"+point_class;
// cout<<"knn_filename "<<knn_filename<<endl; // cout<<"knn_filename "<<knn_filename<<endl;
FileReader knnreader(knn_filename, " "); FileReader knnreader(knn_filename, " ");
vector<Point> knn_queries = knnreader.get_points(); vector<Point> knn_queries = knnreader.get_points();
string point_filename = "../Datasets/"+dataFolder+"/Queries/PointQueries/"+point_class; string point_filename = "./Datasets/"+dataFolder+"/Queries/PointQueries/"+point_class;
FileReader pointreader(point_filename, " "); FileReader pointreader(point_filename, " ");
vector<Point> point_queries = pointreader.get_points(); vector<Point> point_queries = pointreader.get_points();
string insert_filename = "../Datasets/"+dataFolder+"/Queries/PointQueries/"+point_class; string insert_filename = "./Datasets/"+dataFolder+"/Queries/PointQueries/"+point_class;
FileReader insertreader(insert_filename, " "); FileReader insertreader(insert_filename, " ");
vector<Point> insert_queries = insertreader.get_points(); vector<Point> insert_queries = insertreader.get_points();
vector<uint32_t> k_values = toml::find<std::vector<uint32_t>>(config, "knn_k_values"); vector<uint32_t> k_values = toml::find<std::vector<uint32_t>>(config, "knn_k_values");
if(RSMI_or_HRR && is_64) RSMI_knn_point_insert(model_path,points,rsmi_json,dataFolder,experimentName,knn_queries,point_queries,insert_queries,k_values,range_queries); if(RSMI_or_HRR) RSMI_knn_point_insert(model_path,points,rsmi_json,dataFolder,knn_queries,point_queries,insert_queries,k_values,range_queries);
else HRR_knn_point_insert(model_path,points,rsmi_json,dataFolder,experimentName,knn_queries,point_queries,insert_queries,k_values,range_queries); else HRR_knn_point_insert(model_path,points,rsmi_json,dataFolder,knn_queries,point_queries,insert_queries,k_values,range_queries);
} }
......
#!/bin/bash #!/bin/bash
# echo "Creating a dataset and query workload..." echo "Creating a dataset and query workload..."
# python Preprocess/sample_datapoints_queries.py python Preprocess/sample_datapoints_queries.py
echo "Creating all experiment settings" echo "Creating all experiment settings"
config_cnt=`python Preprocess/generate_experiment_configs.py` config_cnt=`python Preprocess/generate_experiment_configs.py`
echo "Compiling.." echo "Compiling.."
bash Preprocess/compile.sh bash Preprocess/compile.sh
cd RSMI
make
cd ..
echo "Building RSMI"
while read p; do
./RSMI/evaluate_rsmi_hrr.out $i $1 0 1
done <./Datasets/RealWorld/Experiments/RSMI_build_list
echo "Executing all configs ($config_cnt)" echo "Executing all configs ($config_cnt)"
...@@ -19,9 +28,12 @@ do ...@@ -19,9 +28,12 @@ do
./FLOOD/evaluate_flood.out $i $1 ./FLOOD/evaluate_flood.out $i $1
./QUILTS/evaluate_quilts.out $i $1 ./QUILTS/evaluate_quilts.out $i $1
./ZPGM/evaluate_zpgm.out $i $1 ./ZPGM/evaluate_zpgm.out $i $1
./RSMI/evaluate_rsmi_hrr.out $i $1 2 0 # HRR
# ./RSMI/evaluate_rsmi_hrr.out $i $1 2 1 # RSMI_UNCOMMENT
done done
echo "Finished all Experiments" echo "Finished all Experiments"
# echo "Plotting..." # echo "Plotting..."
# python plot.py $1 # python plot.py $1
# echo "Finished Plotting" # echo "Finished Plotting"
\ No newline at end of file