1
+ #include < iostream>
2
+ #include < vector>
3
+ #include < tuple>
4
+ #include < set>
5
+ #include < algorithm>
6
+ #include " ../xdelta3/xdelta3.h"
7
+ #include " ../xxhash.h"
8
+ #include " ../lz4.h"
9
+ #define BLOCK_SIZE 4096
10
+ #define MAX_THREAD 256
11
+ #define INF 987654321
12
+ using namespace std ;
13
+
14
+ vector<char *> trace;
15
+ vector<bool > unique_block;
16
+ int N;
17
+
18
+ char buf[MAX_THREAD][2 * BLOCK_SIZE];
19
+ char out[MAX_THREAD][2 * BLOCK_SIZE];
20
+
21
+ char file_name_temp[100 ];
22
+ char file_name_result[100 ];
23
+ FILE* fp_temp;
24
+
25
+ typedef tuple<int , int , int > i3;
26
+
27
+ set<int > todo;
28
+ pthread_mutex_t mutex;
29
+ vector<i3> result;
30
+
31
+ void read_file (char * name) {
32
+ FILE* f = fopen (name, " rb" );
33
+ while (1 ) {
34
+ char * ptr = new char [BLOCK_SIZE];
35
+ trace.push_back (ptr);
36
+ int now = fread (trace[N++], 1 , BLOCK_SIZE, f);
37
+ if (!now) {
38
+ delete[] trace.back ();
39
+ trace.pop_back ();
40
+ N--;
41
+ break ;
42
+ }
43
+ }
44
+ fclose (f);
45
+ }
46
+
47
+ void restore_result (char * name) {
48
+ FILE* f = fopen (file_name_temp, " rt" );
49
+ if (f == NULL ) return ;
50
+
51
+ int num, ref, size;
52
+ while (fscanf (f, " %d %d %d" , &num, &ref, &size) == 3 ) {
53
+ result.push_back ({num, ref, size});
54
+ todo.erase (num);
55
+ }
56
+ fclose (f);
57
+ }
58
+
59
+ void print_result (char * name) {
60
+ long long total = 0 ;
61
+ sort (result.begin (), result.end ());
62
+
63
+ for (i3 u: result) {
64
+ total += get<2 >(u);
65
+ }
66
+
67
+ FILE* f = fopen (file_name_result, " wt" );
68
+ fprintf (f, " %llu %.2lf\n " , total, (double )total / N / BLOCK_SIZE * 100 );
69
+ for (i3 u: result) {
70
+ fprintf (f, " %d %d %d\n " , get<0 >(u), get<1 >(u), get<2 >(u));
71
+ }
72
+ fclose (f);
73
+ }
74
+
75
+ void * func (void * arg) {
76
+ int id = (long long )arg;
77
+
78
+ while (1 ) {
79
+ pthread_mutex_lock (&mutex);
80
+ if (todo.empty ()) {
81
+ pthread_mutex_unlock (&mutex);
82
+ break ;
83
+ }
84
+ int i = *todo.begin ();
85
+ todo.erase (i);
86
+ pthread_mutex_unlock (&mutex);
87
+
88
+ int size = LZ4_compress_default (trace[i], out[id], BLOCK_SIZE, 2 * BLOCK_SIZE);
89
+ int ref = -1 ;
90
+
91
+ for (int j = 0 ; j < i; ++j) {
92
+ if (!unique_block[j]) continue ;
93
+ int now = xdelta3_compress (trace[i], 4096 , trace[j], 4096 , out[id], 1 );
94
+ if (now < size) {
95
+ size = now;
96
+ ref = j;
97
+ }
98
+ }
99
+
100
+ pthread_mutex_lock (&mutex);
101
+ result.push_back ({i, ref, size});
102
+ fprintf (fp_temp, " %d %d %d\n " , i, ref, size);
103
+ if (i % 100 == 0 ) {
104
+ fprintf (stderr, " %d/%d\r " , i, N);
105
+ }
106
+ pthread_mutex_unlock (&mutex);
107
+ }
108
+ return NULL ;
109
+ }
110
+
111
+ int main (int argc, char * argv[]) {
112
+ if (argc != 3 ) {
113
+ printf (" usage: ./bf [file_name] [num_thread]\n " );
114
+ exit (0 );
115
+ }
116
+ sprintf (file_name_temp, " %s_bf_temp" , argv[1 ]);
117
+ sprintf (file_name_result, " %s_bf_result" , argv[1 ]);
118
+
119
+ int NUM_THREAD = atoi (argv[2 ]);
120
+
121
+ read_file (argv[1 ]);
122
+ unique_block.resize (N, 0 );
123
+
124
+ set<XXH64_hash_t> dedup;
125
+ for (int i = 0 ; i < N; ++i) {
126
+ XXH64_hash_t h = XXH64 (trace[i], BLOCK_SIZE, 0 );
127
+ if (!dedup.count (h)) {
128
+ todo.insert (i);
129
+ dedup.insert (h);
130
+ unique_block[i] = 1 ;
131
+ }
132
+ }
133
+
134
+ restore_result (argv[1 ]);
135
+
136
+ pthread_t tid[MAX_THREAD];
137
+ pthread_mutex_init (&mutex, NULL );
138
+ fp_temp = fopen (file_name_temp, " at" );
139
+
140
+ for (int i = 0 ; i < NUM_THREAD; ++i) {
141
+ pthread_create (&tid[i], NULL , func, (void *)i);
142
+ }
143
+
144
+ for (int i = 0 ; i < NUM_THREAD; ++i) {
145
+ pthread_join (tid[i], NULL );
146
+ }
147
+ fclose (fp_temp);
148
+
149
+ print_result (argv[1 ]);
150
+ }
0 commit comments