#define _sugar_
extern "C" {
#include
#include
#include
#include
#include "cl.h"
#include "cqp.h"
#ifndef PCRE2_CODE_UNIT_WIDTH
#define PCRE2_CODE_UNIT_WIDTH 8
#endif
#include
#include "server.h"
#include "cwb/cqp/corpmanag.h"
#include "_globalvars.h"
#include "_eval.h"
/* includes for utils */
#include
}
#include
/* do not use namespace - would create conflict with Range type */
/* using namespace Rcpp; */
// [[Rcpp::interfaces(r, cpp)]]
int cqp_initialization_status = 0;
CorpusList *corpuslist = NULL;
// [[Rcpp::export(name=".init_cqp")]]
void init_cqp() {
int ac = 1;
char * av[1];
av[0] = (char *)"RcppCWB";
which_app = cqp;
silent = 0;
verbose_parser = 0;
paging = 0;
autoshow = 0;
auto_save = 0;
server_log = 0;
enable_macros = 0;
initialize_cqp(ac, av);
cqp_initialization_status = 1;
make_attribute_hash(16384);
}
// [[Rcpp::export(name=".cqp_verbosity")]]
void cqp_verbosity(int quietly, int verbose) {
silent = quietly;
verbose_parser = verbose;
return;
}
// [[Rcpp::export(name=".cqp_get_registry")]]
Rcpp::StringVector cqp_get_registry(){
Rcpp::StringVector result(1);
/* result(0) = cl_standard_registry(); */
result(0) = registry;
return result;
}
// [[Rcpp::export(name=".cqp_get_status")]]
int cqp_get_status(){
return cqp_initialization_status;
}
// [[Rcpp::export(name=".cqp_set_registry")]]
SEXP cqp_set_registry(SEXP registry_dir){
registry = strdup(Rcpp::as<:string>(registry_dir).c_str());
int ac = 1;
char * av[1];
av[0] = (char *)"RcppCWB";
set_current_corpus(NULL, 0); /* required to avoid crash! */
initialize_cqp(ac, av);
make_attribute_hash(16384);
SEXP result = R_NilValue;
return result;
}
// [[Rcpp::export(name=".cqp_list_corpora")]]
Rcpp::StringVector cqp_list_corpora(){
CorpusList * cl;
int i = 0, n = 0;
/* First count corpora */
for (cl = FirstCorpusFromList(); cl != NULL; cl = NextCorpusFromList(cl)) {
if (cl->type == SYSTEM) n++;
}
Rcpp::StringVector result(n);
/* Then build list of names */
for (cl = FirstCorpusFromList(); cl != NULL; cl = NextCorpusFromList(cl)) {
if (cl->type == SYSTEM) {
result(i) = cl->name;
i++;
}
}
return result;
}
// [[Rcpp::export(name=".cqp_query")]]
SEXP cqp_query(SEXP corpus, SEXP subcorpus, SEXP query){
char * mother = (char*)CHAR(STRING_ELT(corpus,0));
char * child = (char*)CHAR(STRING_ELT(subcorpus,0));
char * q = (char*)CHAR(STRING_ELT(query,0));
char * cqp_query;
CorpusList *cl;
SEXP result;
/* is this necessary */
char *c, *sc;
if (!split_subcorpus_spec(mother, &c, &sc)) {
Rprintf("ERROR (function: split_subcorpus_spec)");
}
cl = cqi_find_corpus(mother);
set_current_corpus(cl, 0);
int len = strlen(child) + strlen(q) + 10;
cqp_query = (char *) cl_malloc(len);
snprintf(cqp_query, len, "%s = %s", child, q);
if (!cqi_activate_corpus(mother)){
Rprintf("activation failed");
}
if (!check_subcorpus_name(child)){
Rprintf("checking subcorpus name failed \n");
}
if (!cqp_parse_string(cqp_query)){
Rprintf("ERROR: Cannot parse the CQP query.\n");
result = R_NilValue;
} else {
char * full_child;
CorpusList * childcl;
if (strlen(c) > 0){
full_child = combine_subcorpus_spec(c, child);
} else {
full_child = combine_subcorpus_spec(mother, child);
}
childcl = cqi_find_corpus(full_child);
if ((childcl) == NULL) {
Rprintf("subcorpus not found\n");
result = R_NilValue;
} else {
result = R_MakeExternalPtr(childcl, R_NilValue, R_NilValue);
}
}
return result;
}
// [[Rcpp::export(name=".cqp_subcorpus_size")]]
int cqp_subcorpus_size(SEXP scorpus)
{
int result;
char * subcorpus;
CorpusList * cl;
subcorpus = (char*)CHAR(STRING_ELT(scorpus,0));
cl = cqi_find_corpus(subcorpus);
if (cl == NULL) {
result = 0;
} else {
result = cl->size;
}
return result;
}
// [[Rcpp::export(name=".cqp_list_subcorpora")]]
Rcpp::StringVector cqp_list_subcorpora(SEXP inCorpus)
{
char * corpus;
CorpusList *cl, *mother;
int i = 0, n = 0;
corpus = (char*)CHAR(STRING_ELT(inCorpus,0));
mother = cqi_find_corpus(corpus);
if (!check_corpus_name(corpus) || mother == NULL) {
Rcpp::StringVector result;
return result;
} else {
/* First count subcorpora */
for (cl = FirstCorpusFromList(); cl != NULL; cl = NextCorpusFromList(cl)) {
if (cl->type == SUB && cl->corpus == mother->corpus){
n++;
}
}
Rcpp::StringVector result(n);
/* Then build list of names */
for (cl = FirstCorpusFromList(); cl != NULL; cl = NextCorpusFromList(cl)) {
if (cl->type == SUB && cl->corpus == mother->corpus) {
result(i) = cl->name;
i++;
}
}
return result;
}
}
// [[Rcpp::export(name=".cqp_dump_subcorpus")]]
Rcpp::IntegerMatrix cqp_dump_subcorpus(SEXP inSubcorpus)
{
char * subcorpus;
CorpusList * cl;
int i;
int nrows = cqp_subcorpus_size(inSubcorpus);
subcorpus = (char*)CHAR(STRING_ELT(inSubcorpus,0));
cl = cqi_find_corpus(subcorpus);
if (cl == NULL) {
Rprintf("subcorpus not found\n");
}
Rcpp::IntegerMatrix result(nrows,2);
for (i = 0; i < nrows; i++) {
result(i,0) = cl->range[i].start;
result(i,1) = cl->range[i].end;
}
return result;
}
// [[Rcpp::export(name=".cqp_subcorpus_regions")]]
Rcpp::IntegerMatrix cqp_subcorpus_regions(SEXP subcorpus)
{
CorpusList * cl = (CorpusList*)R_ExternalPtrAddr(subcorpus);
int nrows = cl->size;
int i;
Rcpp::IntegerMatrix result(nrows,2);
for (i = 0; i < nrows; i++) {
result(i,0) = cl->range[i].start;
result(i,1) = cl->range[i].end;
}
return result;
}
// [[Rcpp::export(name=".cqp_drop_subcorpus")]]
SEXP cqp_drop_subcorpus(SEXP inSubcorpus)
{
SEXP result = R_NilValue;
char * subcorpus;
char *c, *sc;
CorpusList * cl;
PROTECT(inSubcorpus);
subcorpus = (char*)CHAR(STRING_ELT(inSubcorpus,0));
if (!split_subcorpus_spec(subcorpus, &c, &sc)) {
UNPROTECT(1);
} else if (sc == NULL) {
free(c);
UNPROTECT(1);
} else {
free(c); free(sc);
cl = cqi_find_corpus(subcorpus);
if (cl == NULL) {
UNPROTECT(1);
} else {
dropcorpus(cl, corpuslist);
}
}
UNPROTECT(1);
return result;
}
// [[Rcpp::export(name=".check_corpus")]]
int check_corpus(SEXP corpus){
char * c;
CorpusList * cl;
c = (char*)CHAR(STRING_ELT(corpus,0));
cl = findcorpus(c, SYSTEM, 0);
if (cl == NULL || !access_corpus(cl)) {
return 0;
} else {
return 1;
}
}
// [[Rcpp::export(name=".cqp_load_corpus")]]
int cqp_load_corpus(SEXP corpus, SEXP registry){
char *entry;
char *dirname;
CorpusList * cl;
entry = strdup(Rcpp::as<:string>(corpus).c_str());
dirname = strdup(Rcpp::as<:string>(registry).c_str());
cl = ensure_syscorpus(dirname, entry);
if (cl == NULL) return 0;
return 1;
}
// [[Rcpp::export(name=".region_matrix_to_subcorpus")]]
SEXP region_matrix_to_subcorpus(Rcpp::IntegerMatrix region_matrix, SEXP corpus, SEXP subcorpus){
int i, n, corpus_size;
SEXP sc;
char* id;
Corpus* c;
Attribute *attr;
CorpusList *cl;
c = (Corpus*)R_ExternalPtrAddr(corpus);
id = strdup(Rcpp::as<:string>(subcorpus).c_str());
n = region_matrix.nrow();
cl = (CorpusList *)cl_malloc(sizeof(CorpusList));
cl->name = id;
char* mum = cl_strdup(c->registry_name);
cl_id_toupper(mum);
cl->mother_name = mum;
attr = cl_new_attribute(c, CWB_DEFAULT_ATT_NAME, ATT_POS);
corpus_size = cl_max_cpos(attr);
cl->mother_size = corpus_size;
cl->registry = c->registry_dir;
cl->abs_fn = NULL;
cl->type = SUB;
cl->local_dir = NULL;
cl->query_corpus = NULL;
cl->query_text = NULL;
cl->saved = False;
cl->loaded = True;
cl->needs_update = False;
cl->corpus = c;
cl->range = (Range *)cl_malloc(sizeof(Range) * n);
for (i = 0; i < n; i++) {
cl->range[i].start = region_matrix(i,0);
cl->range[i].end = region_matrix(i,1);
}
cl->size = n;
cl->sortidx = NULL;
cl->targets = NULL;
cl->keywords = NULL;
cl->cd = NULL;
cl->next = NULL;
cl->next = corpuslist;
corpuslist = cl;
sc = R_MakeExternalPtr(cl, R_NilValue, R_NilValue);
return(sc);
}