forked from PolMine/RcppCWB
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_count_vector.Rd
More file actions
37 lines (35 loc) · 1.04 KB
/
get_count_vector.Rd
File metadata and controls
37 lines (35 loc) · 1.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/count.R
\name{get_count_vector}
\alias{get_count_vector}
\title{Get Vector with Counts for Positional Attribute.}
\usage{
get_count_vector(corpus, p_attribute, registry = Sys.getenv("CORPUS_REGISTRY"))
}
\arguments{
\item{corpus}{a CWB corpus}
\item{p_attribute}{a positional attribute}
\item{registry}{registry directory}
}
\value{
an integer vector
}
\description{
The return value is an integer vector. The length of the vector is the number of
unique tokens in the corpus / the number of unique ids. The order of the counts
corresponds to the number of ids.
}
\examples{
y <- get_count_vector(
corpus = "REUTERS", p_attribute = "word",
registry = get_tmp_registry()
)
df <- data.frame(token_id = 0:(length(y) - 1), count = y)
df[["token"]] <- cl_id2str(
"REUTERS", p_attribute = "word",
id = df[["token_id"]], registry = get_tmp_registry()
)
df <- df[,c("token", "token_id", "count")] # reorder columns
df <- df[order(df[["count"]], decreasing = TRUE),]
head(df)
}