forked from skeskinen/bert.cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.cpp
More file actions
88 lines (73 loc) · 2.52 KB
/
server.cpp
File metadata and controls
88 lines (73 loc) · 2.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#include "bert.h"
#include "ggml.h"
#include <iostream>
#include <string>
#include <vector>
#include <cstring>
#include <unistd.h>
#include <sys/socket.h>
#include <arpa/inet.h>
std::string receive_string(int socket) {
static char buffer[1 << 15] = {0};
ssize_t bytes_received = read(socket, buffer, sizeof(buffer));
return std::string(buffer, bytes_received);
}
void send_floats(int socket, const std::vector<float> floats) {
send(socket, floats.data(), floats.size() * sizeof(float), 0);
}
int main(int argc, char ** argv) {
bert_params params;
params.model = "../../models/all-MiniLM-L6-v2/ggml-model-q4_0.bin";
if (bert_params_parse(argc, argv, params) == false) {
return 1;
}
bert_ctx * bctx;
// load the model
{
if ((bctx = bert_load_from_file(params.model)) == nullptr) {
fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model);
return 1;
}
}
int server_fd, new_socket;
struct sockaddr_in address;
int addrlen = sizeof(address);
if ((server_fd = socket(AF_INET, SOCK_STREAM, 0)) == 0) {
std::cerr << "Socket creation failed" << std::endl;
return -1;
}
address.sin_family = AF_INET;
address.sin_addr.s_addr = INADDR_ANY;
address.sin_port = htons(params.port);
if (bind(server_fd, (struct sockaddr *)&address, sizeof(address)) < 0) {
std::cerr << "Bind failed" << std::endl;
return -1;
}
if (listen(server_fd, 1) < 0) {
std::cerr << "Listen failed" << std::endl;
return -1;
}
std::cout << "Server running on port " << params.port << " with " << params.n_threads << " threads" << std::endl;
int n_embd = bert_n_embd(bctx);
while(true) {
std::cout << "Waiting for a client" << std::endl;
if ((new_socket = accept(server_fd, (struct sockaddr *)&address, (socklen_t *)&addrlen)) < 0) {
std::cerr << "Accept failed" << std::endl;
return -1;
}
std::cout << "New connection" << std::endl;
send(new_socket, &n_embd, sizeof(int), 0);
while(true) {
std::string string_in = receive_string(new_socket);
if (string_in.empty()) {
break;
}
std::vector<float> embeddings = std::vector<float>(n_embd);
bert_encode(bctx, params.n_threads, string_in.data(), embeddings.data());
send_floats(new_socket, embeddings);
}
close(new_socket);
}
close(server_fd);
return 0;
}