forked from PolMine/RcppCWB
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathregion_matrix_ops.html
More file actions
502 lines (462 loc) · 45.8 KB
/
region_matrix_ops.html
File metadata and controls
502 lines (462 loc) · 45.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
<!DOCTYPE html>
<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Get IDs and Counts for Region Matrices. — region_matrix_ops • RcppCWB</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Get IDs and Counts for Region Matrices. — region_matrix_ops"><meta property="og:description" content="Get IDs and Counts for Region Matrices."><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]--></head><body data-spy="scroll" data-target="#toc">
<div class="container template-reference-topic">
<header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">RcppCWB</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.6.0</span>
</span>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav"><li>
<a href="../reference/index.html">Reference</a>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
Articles
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu"><li>
<a href="../articles/vignette.html">Writing performance code with RcppCWB</a>
</li>
</ul></li>
<li>
<a href="../news/index.html">Changelog</a>
</li>
</ul><ul class="nav navbar-nav navbar-right"><li>
<a href="https://github.com/PolMine/RcppCWB/" class="external-link">
<span class="fab fa-github fa-lg"></span>
</a>
</li>
</ul></div><!--/.nav-collapse -->
</div><!--/.container -->
</div><!--/.navbar -->
</header><div class="row">
<div class="col-md-9 contents">
<div class="page-header">
<h1>Get IDs and Counts for Region Matrices.</h1>
<small class="dont-index">Source: <a href="https://github.com/PolMine/RcppCWB/blob/HEAD/R/region_matrix.R" class="external-link"><code>R/region_matrix.R</code></a></small>
<div class="hidden name"><code>region_matrix_ops.Rd</code></div>
</div>
<div class="ref-description">
<p>Get IDs and Counts for Region Matrices.</p>
</div>
<div id="ref-usage">
<div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">region_matrix_to_ids</span><span class="op">(</span></span>
<span> <span class="va">corpus</span>,</span>
<span> <span class="va">p_attribute</span>,</span>
<span> registry <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/Sys.getenv.html" class="external-link">Sys.getenv</a></span><span class="op">(</span><span class="st">"CORPUS_REGISTRY"</span><span class="op">)</span>,</span>
<span> <span class="va">matrix</span></span>
<span><span class="op">)</span></span>
<span></span>
<span><span class="fu">region_matrix_to_count_matrix</span><span class="op">(</span></span>
<span> <span class="va">corpus</span>,</span>
<span> <span class="va">p_attribute</span>,</span>
<span> registry <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/Sys.getenv.html" class="external-link">Sys.getenv</a></span><span class="op">(</span><span class="st">"CORPUS_REGISTRY"</span><span class="op">)</span>,</span>
<span> <span class="va">matrix</span></span>
<span><span class="op">)</span></span>
<span></span>
<span><span class="fu">region_matrix_context</span><span class="op">(</span></span>
<span> <span class="va">corpus</span>,</span>
<span> registry <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/Sys.getenv.html" class="external-link">Sys.getenv</a></span><span class="op">(</span><span class="st">"CORPUS_REGISTRY"</span><span class="op">)</span>,</span>
<span> <span class="va">matrix</span>,</span>
<span> <span class="va">p_attribute</span>,</span>
<span> <span class="va">s_attribute</span>,</span>
<span> <span class="va">boundary</span>,</span>
<span> <span class="va">left</span>,</span>
<span> <span class="va">right</span></span>
<span><span class="op">)</span></span>
<span></span>
<span><span class="fu">ranges_to_cpos</span><span class="op">(</span><span class="va">ranges</span><span class="op">)</span></span></code></pre></div>
</div>
<div id="arguments">
<h2>Arguments</h2>
<dl><dt>corpus</dt>
<dd><p>a CWB corpus</p></dd>
<dt>p_attribute</dt>
<dd><p>a positional attribute</p></dd>
<dt>registry</dt>
<dd><p>registry directory</p></dd>
<dt>matrix</dt>
<dd><p>a regions matrix</p></dd>
<dt>s_attribute</dt>
<dd><p>If not <code>NULL</code>, a structural attribute (length-one
<code>character</code> vector), typically indicating a sentence ("s").</p></dd>
<dt>boundary</dt>
<dd><p>Structural attribute (length-one <code>character</code> vector) that
serves as a boundary and that shall not be transgressed.</p></dd>
<dt>left</dt>
<dd><p>An <code>integer</code> value, number of strucs to move to the left.</p></dd>
<dt>right</dt>
<dd><p>An <code>integer</code> value, number of strucs to move to the right.</p></dd>
<dt>ranges</dt>
<dd><p>A two-column integer <code>matrix</code> of ranges (left and right corpus
positions in first and second column, respectively).</p></dd>
</dl></div>
<div id="details">
<h2>Details</h2>
<p><code>ranges_to_cpos()</code> will turn a <code>matrix</code> of ranges into an <code>integer</code>
vector with the individual corpus positions covered by the ranges.</p>
</div>
<div id="ref-examples">
<h2>Examples</h2>
<div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span><span class="co"># Scenario 1: Get full text for a subcorpus defined by regions</span></span></span>
<span class="r-in"><span><span class="va">m</span> <span class="op"><-</span> <span class="fu"><a href="get_region_matrix.html">get_region_matrix</a></span><span class="op">(</span></span></span>
<span class="r-in"><span> corpus <span class="op">=</span> <span class="st">"REUTERS"</span>, s_attribute <span class="op">=</span> <span class="st">"places"</span>,</span></span>
<span class="r-in"><span> strucs <span class="op">=</span> <span class="fl">4L</span><span class="op">:</span><span class="fl">5L</span>, registry <span class="op">=</span> <span class="fu"><a href="tmp_registry.html">get_tmp_registry</a></span><span class="op">(</span><span class="op">)</span></span></span>
<span class="r-in"><span> <span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">ids</span> <span class="op"><-</span> <span class="fu">region_matrix_to_ids</span><span class="op">(</span></span></span>
<span class="r-in"><span> corpus <span class="op">=</span> <span class="st">"REUTERS"</span>, p_attribute <span class="op">=</span> <span class="st">"word"</span>,</span></span>
<span class="r-in"><span> registry <span class="op">=</span> <span class="fu"><a href="tmp_registry.html">get_tmp_registry</a></span><span class="op">(</span><span class="op">)</span>, matrix <span class="op">=</span> <span class="va">m</span></span></span>
<span class="r-in"><span> <span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">tokenstream</span> <span class="op"><-</span> <span class="fu"><a href="p_attributes.html">cl_id2str</a></span><span class="op">(</span></span></span>
<span class="r-in"><span> corpus <span class="op">=</span> <span class="st">"REUTERS"</span>, p_attribute <span class="op">=</span> <span class="st">"word"</span>,</span></span>
<span class="r-in"><span> registry <span class="op">=</span> <span class="fu"><a href="tmp_registry.html">get_tmp_registry</a></span><span class="op">(</span><span class="op">)</span>, id <span class="op">=</span> <span class="va">ids</span></span></span>
<span class="r-in"><span> <span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">txt</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/paste.html" class="external-link">paste</a></span><span class="op">(</span><span class="va">tokenstream</span>, collapse <span class="op">=</span> <span class="st">" "</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">txt</span></span></span>
<span class="r-out co"><span class="r-pr">#></span> [1] "Houston Oil Trust said that independent petroleum engineers completed an annual study that estimates the trust's future net revenues from total proved reserves at 88 mln dlrs and its discounted present value of the reserves at 64 mln dlrs Based on the estimate the trust said there may be no money available for cash distributions to unitholders for the remainder of the year It said the estimates reflect a decrease of about 44 pct in net reserve revenues and 39 pct in discounted present value compared with the study made in 1985 Reuter Kuwait s Oil Minister in remarks published today said there were no plans for an emergency OPEC meeting to review oil policies after recent weakness in world oil prices Sheikh Ali al Khalifa al Sabah was quoted by the local daily al Qabas as saying None of the OPEC members has asked for such a meeting He denied Kuwait was pumping above its quota of 948,000 barrels of crude daily bpd set under self imposed production limits of the 13 nation organisation Traders and analysts in international oil markets estimate OPEC is producing up to one mln bpd above a ceiling of 15.8 mln bpd agreed in Geneva last December They named Kuwait and the United Arab Emirates along with the much smaller producer Ecuador among those producing above quota Kuwait they said was pumping 1.2 mln bpd This rumour is baseless It is based on reports which said Kuwait has the ability to exceed its share They suppose that because Kuwait has the ability it will do so the minister said Sheikh Ali has said before that Kuwait had the ability to produce up to 4.0 mln bpd If we can sell more than our quota at official prices while some countries are suffering difficulties marketing their share it means we in Kuwait are unusually clever he said He was referring apparently to the Gulf state of qatar which industry sources said was selling less than 180,000 bpd of its 285,000 bpd quota because buyers were resisting official prices restored by OPEC last month pegged to a marker of 18 dlrs per barrel Prices in New York last week dropped to their lowest levels this year and almost three dollars below a three month high of 19 dollars a barrel Sheikh Ali also delivered a challenge to any international oil company that declared Kuwait sold below official prices Because it was charging its official price of 16.67 dlrs a barrel it had lost custom he said but did not elaborate However Kuwait had guaranteed markets for its oil because of its local and international refining facilities and its own distribution network abroad he added He reaffirmed that the planned meeting March 7 of OPEC s differentials committee has been postponed until the start of April at the request of certain of the body s members Ecuador s deputy energy minister Fernando Santos Alvite said last Wednesday his debt burdened country wanted OPEC to assign a lower official price for its crude and was to seek this at talks this month of opec s pricing committee Referring to pressure by oil companies on OPEC members in apparent reference to difficulties faced by Qatar he said We expected such pressure It will continue through March and April But he expected the situation would later improve REUTER"</span>
<span class="r-in"><span></span></span>
<span class="r-in"><span><span class="co"># Scenario 2: Get data.frame with counts for region matrix</span></span></span>
<span class="r-in"><span><span class="va">y</span> <span class="op"><-</span> <span class="fu">region_matrix_to_count_matrix</span><span class="op">(</span></span></span>
<span class="r-in"><span> corpus <span class="op">=</span> <span class="st">"REUTERS"</span>, p_attribute <span class="op">=</span> <span class="st">"word"</span>,</span></span>
<span class="r-in"><span> registry <span class="op">=</span> <span class="fu"><a href="tmp_registry.html">get_tmp_registry</a></span><span class="op">(</span><span class="op">)</span>, matrix <span class="op">=</span> <span class="va">m</span></span></span>
<span class="r-in"><span> <span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">df</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/as.data.frame.html" class="external-link">as.data.frame</a></span><span class="op">(</span><span class="va">y</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="fu"><a href="https://rdrr.io/r/base/colnames.html" class="external-link">colnames</a></span><span class="op">(</span><span class="va">df</span><span class="op">)</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"token_id"</span>, <span class="st">"count"</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">df</span><span class="op">[[</span><span class="st">"token"</span><span class="op">]</span><span class="op">]</span> <span class="op"><-</span> <span class="fu"><a href="p_attributes.html">cl_id2str</a></span><span class="op">(</span></span></span>
<span class="r-in"><span> <span class="st">"REUTERS"</span>, p_attribute <span class="op">=</span> <span class="st">"word"</span>,</span></span>
<span class="r-in"><span> registry <span class="op">=</span> <span class="fu"><a href="tmp_registry.html">get_tmp_registry</a></span><span class="op">(</span><span class="op">)</span>, id <span class="op">=</span> <span class="va">df</span><span class="op">[[</span><span class="st">"token_id"</span><span class="op">]</span><span class="op">]</span></span></span>
<span class="r-in"><span> <span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">df</span><span class="op">[</span><span class="fu"><a href="https://rdrr.io/r/base/order.html" class="external-link">order</a></span><span class="op">(</span><span class="va">df</span><span class="op">[[</span><span class="st">"count"</span><span class="op">]</span><span class="op">]</span>, decreasing <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span>,<span class="op">]</span></span></span>
<span class="r-out co"><span class="r-pr">#></span> token_id count token</span>
<span class="r-out co"><span class="r-pr">#></span> 17 31 23 the</span>
<span class="r-out co"><span class="r-pr">#></span> 21 37 19 of</span>
<span class="r-out co"><span class="r-pr">#></span> 16 29 14 to</span>
<span class="r-out co"><span class="r-pr">#></span> 1 3 13 said</span>
<span class="r-out co"><span class="r-pr">#></span> 20 35 10 in</span>
<span class="r-out co"><span class="r-pr">#></span> 135 347 10 Kuwait</span>
<span class="r-out co"><span class="r-pr">#></span> 6 10 9 its</span>
<span class="r-out co"><span class="r-pr">#></span> 13 19 9 a</span>
<span class="r-out co"><span class="r-pr">#></span> 22 40 9 and</span>
<span class="r-out co"><span class="r-pr">#></span> 18 33 7 was</span>
<span class="r-out co"><span class="r-pr">#></span> 29 59 7 OPEC</span>
<span class="r-out co"><span class="r-pr">#></span> 57 150 7 bpd</span>
<span class="r-out co"><span class="r-pr">#></span> 2 4 6 that</span>
<span class="r-out co"><span class="r-pr">#></span> 8 13 6 for</span>
<span class="r-out co"><span class="r-pr">#></span> 10 15 6 oil</span>
<span class="r-out co"><span class="r-pr">#></span> 56 149 6 mln</span>
<span class="r-out co"><span class="r-pr">#></span> 61 154 5 has</span>
<span class="r-out co"><span class="r-pr">#></span> 66 171 5 he</span>
<span class="r-out co"><span class="r-pr">#></span> 81 214 5 at</span>
<span class="r-out co"><span class="r-pr">#></span> 136 348 5 s</span>
<span class="r-out co"><span class="r-pr">#></span> 202 414 5 official</span>
<span class="r-out co"><span class="r-pr">#></span> 4 7 4 it</span>
<span class="r-out co"><span class="r-pr">#></span> 7 12 4 prices</span>
<span class="r-out co"><span class="r-pr">#></span> 11 16 4 by</span>
<span class="r-out co"><span class="r-pr">#></span> 12 18 4 dlrs</span>
<span class="r-out co"><span class="r-pr">#></span> 26 53 4 last</span>
<span class="r-out co"><span class="r-pr">#></span> 58 151 4 quota</span>
<span class="r-out co"><span class="r-pr">#></span> 5 8 3 had</span>
<span class="r-out co"><span class="r-pr">#></span> 14 20 3 barrel</span>
<span class="r-out co"><span class="r-pr">#></span> 24 45 3 is</span>
<span class="r-out co"><span class="r-pr">#></span> 40 90 3 meeting</span>
<span class="r-out co"><span class="r-pr">#></span> 46 123 3 He</span>
<span class="r-out co"><span class="r-pr">#></span> 48 128 3 ability</span>
<span class="r-out co"><span class="r-pr">#></span> 54 147 3 above</span>
<span class="r-out co"><span class="r-pr">#></span> 65 169 3 on</span>
<span class="r-out co"><span class="r-pr">#></span> 86 249 3 this</span>
<span class="r-out co"><span class="r-pr">#></span> 127 339 3 It</span>
<span class="r-out co"><span class="r-pr">#></span> 146 358 3 Sheikh</span>
<span class="r-out co"><span class="r-pr">#></span> 147 359 3 Ali</span>
<span class="r-out co"><span class="r-pr">#></span> 148 360 3 al</span>
<span class="r-out co"><span class="r-pr">#></span> 157 369 3 members</span>
<span class="r-out co"><span class="r-pr">#></span> 171 383 3 international</span>
<span class="r-out co"><span class="r-pr">#></span> 196 408 3 because</span>
<span class="r-out co"><span class="r-pr">#></span> 222 434 3 month</span>
<span class="r-out co"><span class="r-pr">#></span> 9 14 2 crude</span>
<span class="r-out co"><span class="r-pr">#></span> 15 25 2 price</span>
<span class="r-out co"><span class="r-pr">#></span> 27 57 2 markets</span>
<span class="r-out co"><span class="r-pr">#></span> 37 86 2 They</span>
<span class="r-out co"><span class="r-pr">#></span> 38 88 2 an</span>
<span class="r-out co"><span class="r-pr">#></span> 45 119 2 with</span>
<span class="r-out co"><span class="r-pr">#></span> 52 143 2 were</span>
<span class="r-out co"><span class="r-pr">#></span> 72 193 2 will</span>
<span class="r-out co"><span class="r-pr">#></span> 75 201 2 their</span>
<span class="r-out co"><span class="r-pr">#></span> 76 203 2 March</span>
<span class="r-out co"><span class="r-pr">#></span> 77 204 2 April</span>
<span class="r-out co"><span class="r-pr">#></span> 80 211 2 than</span>
<span class="r-out co"><span class="r-pr">#></span> 88 259 2 we</span>
<span class="r-out co"><span class="r-pr">#></span> 89 260 2 are</span>
<span class="r-out co"><span class="r-pr">#></span> 94 306 2 Oil</span>
<span class="r-out co"><span class="r-pr">#></span> 101 313 2 study</span>
<span class="r-out co"><span class="r-pr">#></span> 102 314 2 estimates</span>
<span class="r-out co"><span class="r-pr">#></span> 105 317 2 net</span>
<span class="r-out co"><span class="r-pr">#></span> 106 318 2 revenues</span>
<span class="r-out co"><span class="r-pr">#></span> 110 322 2 reserves</span>
<span class="r-out co"><span class="r-pr">#></span> 112 324 2 discounted</span>
<span class="r-out co"><span class="r-pr">#></span> 113 325 2 present</span>
<span class="r-out co"><span class="r-pr">#></span> 114 326 2 value</span>
<span class="r-out co"><span class="r-pr">#></span> 116 328 2 estimate</span>
<span class="r-out co"><span class="r-pr">#></span> 118 330 2 there</span>
<span class="r-out co"><span class="r-pr">#></span> 119 331 2 no</span>
<span class="r-out co"><span class="r-pr">#></span> 126 338 2 year</span>
<span class="r-out co"><span class="r-pr">#></span> 130 342 2 pct</span>
<span class="r-out co"><span class="r-pr">#></span> 152 364 2 local</span>
<span class="r-out co"><span class="r-pr">#></span> 153 365 2 daily</span>
<span class="r-out co"><span class="r-pr">#></span> 159 371 2 such</span>
<span class="r-out co"><span class="r-pr">#></span> 161 373 2 pumping</span>
<span class="r-out co"><span class="r-pr">#></span> 172 384 2 producing</span>
<span class="r-out co"><span class="r-pr">#></span> 173 385 2 up</span>
<span class="r-out co"><span class="r-pr">#></span> 184 396 2 Ecuador</span>
<span class="r-out co"><span class="r-pr">#></span> 192 404 2 which</span>
<span class="r-out co"><span class="r-pr">#></span> 194 406 2 share</span>
<span class="r-out co"><span class="r-pr">#></span> 197 409 2 minister</span>
<span class="r-out co"><span class="r-pr">#></span> 206 418 2 difficulties</span>
<span class="r-out co"><span class="r-pr">#></span> 233 445 2 three</span>
<span class="r-out co"><span class="r-pr">#></span> 234 446 2 dollars</span>
<span class="r-out co"><span class="r-pr">#></span> 235 447 2 below</span>
<span class="r-out co"><span class="r-pr">#></span> 260 472 2 committee</span>
<span class="r-out co"><span class="r-pr">#></span> 285 497 2 pressure</span>
<span class="r-out co"><span class="r-pr">#></span> 291 503 2 expected</span>
<span class="r-out co"><span class="r-pr">#></span> 3 6 1 today</span>
<span class="r-out co"><span class="r-pr">#></span> 19 34 1 made</span>
<span class="r-out co"><span class="r-pr">#></span> 23 43 1 company</span>
<span class="r-out co"><span class="r-pr">#></span> 25 49 1 companies</span>
<span class="r-out co"><span class="r-pr">#></span> 28 58 1 Reuter</span>
<span class="r-out co"><span class="r-pr">#></span> 30 60 1 may</span>
<span class="r-out co"><span class="r-pr">#></span> 31 61 1 be</span>
<span class="r-out co"><span class="r-pr">#></span> 32 64 1 before</span>
<span class="r-out co"><span class="r-pr">#></span> 33 69 1 production</span>
<span class="r-out co"><span class="r-pr">#></span> 34 78 1 industry</span>
<span class="r-out co"><span class="r-pr">#></span> 35 79 1 analysts</span>
<span class="r-out co"><span class="r-pr">#></span> 36 83 1 as</span>
<span class="r-out co"><span class="r-pr">#></span> 39 89 1 emergency</span>
<span class="r-out co"><span class="r-pr">#></span> 41 103 1 sources</span>
<span class="r-out co"><span class="r-pr">#></span> 42 108 1 world</span>
<span class="r-out co"><span class="r-pr">#></span> 43 110 1 not</span>
<span class="r-out co"><span class="r-pr">#></span> 44 111 1 but</span>
<span class="r-out co"><span class="r-pr">#></span> 47 127 1 about</span>
<span class="r-out co"><span class="r-pr">#></span> 49 130 1 under</span>
<span class="r-out co"><span class="r-pr">#></span> 50 140 1 But</span>
<span class="r-out co"><span class="r-pr">#></span> 51 141 1 some</span>
<span class="r-out co"><span class="r-pr">#></span> 53 146 1 would</span>
<span class="r-out co"><span class="r-pr">#></span> 55 148 1 15.8</span>
<span class="r-out co"><span class="r-pr">#></span> 59 152 1 set</span>
<span class="r-out co"><span class="r-pr">#></span> 60 153 1 December</span>
<span class="r-out co"><span class="r-pr">#></span> 62 156 1 buyers</span>
<span class="r-out co"><span class="r-pr">#></span> 63 162 1 differentials</span>
<span class="r-out co"><span class="r-pr">#></span> 64 165 1 one</span>
<span class="r-out co"><span class="r-pr">#></span> 67 172 1 named</span>
<span class="r-out co"><span class="r-pr">#></span> 68 179 1 added</span>
<span class="r-out co"><span class="r-pr">#></span> 69 185 1 reports</span>
<span class="r-out co"><span class="r-pr">#></span> 70 189 1 However</span>
<span class="r-out co"><span class="r-pr">#></span> 71 191 1 can</span>
<span class="r-out co"><span class="r-pr">#></span> 73 194 1 they</span>
<span class="r-out co"><span class="r-pr">#></span> 74 200 1 sell</span>
<span class="r-out co"><span class="r-pr">#></span> 78 208 1 do</span>
<span class="r-out co"><span class="r-pr">#></span> 79 210 1 more</span>
<span class="r-out co"><span class="r-pr">#></span> 82 234 1 New</span>
<span class="r-out co"><span class="r-pr">#></span> 83 235 1 York</span>
<span class="r-out co"><span class="r-pr">#></span> 84 244 1 been</span>
<span class="r-out co"><span class="r-pr">#></span> 85 246 1 through</span>
<span class="r-out co"><span class="r-pr">#></span> 87 257 1 so</span>
<span class="r-out co"><span class="r-pr">#></span> 90 272 1 64</span>
<span class="r-out co"><span class="r-pr">#></span> 91 275 1 decrease</span>
<span class="r-out co"><span class="r-pr">#></span> 92 289 1 19</span>
<span class="r-out co"><span class="r-pr">#></span> 93 305 1 Houston</span>
<span class="r-out co"><span class="r-pr">#></span> 95 307 1 Trust</span>
<span class="r-out co"><span class="r-pr">#></span> 96 308 1 independent</span>
<span class="r-out co"><span class="r-pr">#></span> 97 309 1 petroleum</span>
<span class="r-out co"><span class="r-pr">#></span> 98 310 1 engineers</span>
<span class="r-out co"><span class="r-pr">#></span> 99 311 1 completed</span>
<span class="r-out co"><span class="r-pr">#></span> 100 312 1 annual</span>
<span class="r-out co"><span class="r-pr">#></span> 103 315 1 trust's</span>
<span class="r-out co"><span class="r-pr">#></span> 104 316 1 future</span>
<span class="r-out co"><span class="r-pr">#></span> 107 319 1 from</span>
<span class="r-out co"><span class="r-pr">#></span> 108 320 1 total</span>
<span class="r-out co"><span class="r-pr">#></span> 109 321 1 proved</span>
<span class="r-out co"><span class="r-pr">#></span> 111 323 1 88</span>
<span class="r-out co"><span class="r-pr">#></span> 115 327 1 Based</span>
<span class="r-out co"><span class="r-pr">#></span> 117 329 1 trust</span>
<span class="r-out co"><span class="r-pr">#></span> 120 332 1 money</span>
<span class="r-out co"><span class="r-pr">#></span> 121 333 1 available</span>
<span class="r-out co"><span class="r-pr">#></span> 122 334 1 cash</span>
<span class="r-out co"><span class="r-pr">#></span> 123 335 1 distributions</span>
<span class="r-out co"><span class="r-pr">#></span> 124 336 1 unitholders</span>
<span class="r-out co"><span class="r-pr">#></span> 125 337 1 remainder</span>
<span class="r-out co"><span class="r-pr">#></span> 128 340 1 reflect</span>
<span class="r-out co"><span class="r-pr">#></span> 129 341 1 44</span>
<span class="r-out co"><span class="r-pr">#></span> 131 343 1 reserve</span>
<span class="r-out co"><span class="r-pr">#></span> 132 344 1 39</span>
<span class="r-out co"><span class="r-pr">#></span> 133 345 1 compared</span>
<span class="r-out co"><span class="r-pr">#></span> 134 346 1 1985</span>
<span class="r-out co"><span class="r-pr">#></span> 137 349 1 Minister</span>
<span class="r-out co"><span class="r-pr">#></span> 138 350 1 remarks</span>
<span class="r-out co"><span class="r-pr">#></span> 139 351 1 published</span>
<span class="r-out co"><span class="r-pr">#></span> 140 352 1 plans</span>
<span class="r-out co"><span class="r-pr">#></span> 141 353 1 review</span>
<span class="r-out co"><span class="r-pr">#></span> 142 354 1 policies</span>
<span class="r-out co"><span class="r-pr">#></span> 143 355 1 after</span>
<span class="r-out co"><span class="r-pr">#></span> 144 356 1 recent</span>
<span class="r-out co"><span class="r-pr">#></span> 145 357 1 weakness</span>
<span class="r-out co"><span class="r-pr">#></span> 149 361 1 Khalifa</span>
<span class="r-out co"><span class="r-pr">#></span> 150 362 1 Sabah</span>
<span class="r-out co"><span class="r-pr">#></span> 151 363 1 quoted</span>
<span class="r-out co"><span class="r-pr">#></span> 154 366 1 Qabas</span>
<span class="r-out co"><span class="r-pr">#></span> 155 367 1 saying</span>
<span class="r-out co"><span class="r-pr">#></span> 156 368 1 None</span>
<span class="r-out co"><span class="r-pr">#></span> 158 370 1 asked</span>
<span class="r-out co"><span class="r-pr">#></span> 160 372 1 denied</span>
<span class="r-out co"><span class="r-pr">#></span> 162 374 1 948,000</span>
<span class="r-out co"><span class="r-pr">#></span> 163 375 1 barrels</span>
<span class="r-out co"><span class="r-pr">#></span> 164 376 1 self</span>
<span class="r-out co"><span class="r-pr">#></span> 165 377 1 imposed</span>
<span class="r-out co"><span class="r-pr">#></span> 166 378 1 limits</span>
<span class="r-out co"><span class="r-pr">#></span> 167 379 1 13</span>
<span class="r-out co"><span class="r-pr">#></span> 168 380 1 nation</span>
<span class="r-out co"><span class="r-pr">#></span> 169 381 1 organisation</span>
<span class="r-out co"><span class="r-pr">#></span> 170 382 1 Traders</span>
<span class="r-out co"><span class="r-pr">#></span> 174 386 1 ceiling</span>
<span class="r-out co"><span class="r-pr">#></span> 175 387 1 agreed</span>
<span class="r-out co"><span class="r-pr">#></span> 176 388 1 Geneva</span>
<span class="r-out co"><span class="r-pr">#></span> 177 389 1 United</span>
<span class="r-out co"><span class="r-pr">#></span> 178 390 1 Arab</span>
<span class="r-out co"><span class="r-pr">#></span> 179 391 1 Emirates</span>
<span class="r-out co"><span class="r-pr">#></span> 180 392 1 along</span>
<span class="r-out co"><span class="r-pr">#></span> 181 393 1 much</span>
<span class="r-out co"><span class="r-pr">#></span> 182 394 1 smaller</span>
<span class="r-out co"><span class="r-pr">#></span> 183 395 1 producer</span>
<span class="r-out co"><span class="r-pr">#></span> 185 397 1 among</span>
<span class="r-out co"><span class="r-pr">#></span> 186 398 1 those</span>
<span class="r-out co"><span class="r-pr">#></span> 187 399 1 1.2</span>
<span class="r-out co"><span class="r-pr">#></span> 188 400 1 This</span>
<span class="r-out co"><span class="r-pr">#></span> 189 401 1 rumour</span>
<span class="r-out co"><span class="r-pr">#></span> 190 402 1 baseless</span>
<span class="r-out co"><span class="r-pr">#></span> 191 403 1 based</span>
<span class="r-out co"><span class="r-pr">#></span> 193 405 1 exceed</span>
<span class="r-out co"><span class="r-pr">#></span> 195 407 1 suppose</span>
<span class="r-out co"><span class="r-pr">#></span> 198 410 1 produce</span>
<span class="r-out co"><span class="r-pr">#></span> 199 411 1 4.0</span>
<span class="r-out co"><span class="r-pr">#></span> 200 412 1 If</span>
<span class="r-out co"><span class="r-pr">#></span> 201 413 1 our</span>
<span class="r-out co"><span class="r-pr">#></span> 203 415 1 while</span>
<span class="r-out co"><span class="r-pr">#></span> 204 416 1 countries</span>
<span class="r-out co"><span class="r-pr">#></span> 205 417 1 suffering</span>
<span class="r-out co"><span class="r-pr">#></span> 207 419 1 marketing</span>
<span class="r-out co"><span class="r-pr">#></span> 208 420 1 means</span>
<span class="r-out co"><span class="r-pr">#></span> 209 421 1 unusually</span>
<span class="r-out co"><span class="r-pr">#></span> 210 422 1 clever</span>
<span class="r-out co"><span class="r-pr">#></span> 211 423 1 referring</span>
<span class="r-out co"><span class="r-pr">#></span> 212 424 1 apparently</span>
<span class="r-out co"><span class="r-pr">#></span> 213 425 1 Gulf</span>
<span class="r-out co"><span class="r-pr">#></span> 214 426 1 state</span>
<span class="r-out co"><span class="r-pr">#></span> 215 427 1 qatar</span>
<span class="r-out co"><span class="r-pr">#></span> 216 428 1 selling</span>
<span class="r-out co"><span class="r-pr">#></span> 217 429 1 less</span>
<span class="r-out co"><span class="r-pr">#></span> 218 430 1 180,000</span>
<span class="r-out co"><span class="r-pr">#></span> 219 431 1 285,000</span>
<span class="r-out co"><span class="r-pr">#></span> 220 432 1 resisting</span>
<span class="r-out co"><span class="r-pr">#></span> 221 433 1 restored</span>
<span class="r-out co"><span class="r-pr">#></span> 223 435 1 pegged</span>
<span class="r-out co"><span class="r-pr">#></span> 224 436 1 marker</span>
<span class="r-out co"><span class="r-pr">#></span> 225 437 1 18</span>
<span class="r-out co"><span class="r-pr">#></span> 226 438 1 per</span>
<span class="r-out co"><span class="r-pr">#></span> 227 439 1 Prices</span>
<span class="r-out co"><span class="r-pr">#></span> 228 440 1 week</span>
<span class="r-out co"><span class="r-pr">#></span> 229 441 1 dropped</span>
<span class="r-out co"><span class="r-pr">#></span> 230 442 1 lowest</span>
<span class="r-out co"><span class="r-pr">#></span> 231 443 1 levels</span>
<span class="r-out co"><span class="r-pr">#></span> 232 444 1 almost</span>
<span class="r-out co"><span class="r-pr">#></span> 236 448 1 high</span>
<span class="r-out co"><span class="r-pr">#></span> 237 449 1 also</span>
<span class="r-out co"><span class="r-pr">#></span> 238 450 1 delivered</span>
<span class="r-out co"><span class="r-pr">#></span> 239 451 1 challenge</span>
<span class="r-out co"><span class="r-pr">#></span> 240 452 1 any</span>
<span class="r-out co"><span class="r-pr">#></span> 241 453 1 declared</span>
<span class="r-out co"><span class="r-pr">#></span> 242 454 1 sold</span>
<span class="r-out co"><span class="r-pr">#></span> 243 455 1 Because</span>
<span class="r-out co"><span class="r-pr">#></span> 244 456 1 charging</span>
<span class="r-out co"><span class="r-pr">#></span> 245 457 1 16.67</span>
<span class="r-out co"><span class="r-pr">#></span> 246 458 1 lost</span>
<span class="r-out co"><span class="r-pr">#></span> 247 459 1 custom</span>
<span class="r-out co"><span class="r-pr">#></span> 248 460 1 did</span>
<span class="r-out co"><span class="r-pr">#></span> 249 461 1 elaborate</span>
<span class="r-out co"><span class="r-pr">#></span> 250 462 1 guaranteed</span>
<span class="r-out co"><span class="r-pr">#></span> 251 463 1 refining</span>
<span class="r-out co"><span class="r-pr">#></span> 252 464 1 facilities</span>
<span class="r-out co"><span class="r-pr">#></span> 253 465 1 own</span>
<span class="r-out co"><span class="r-pr">#></span> 254 466 1 distribution</span>
<span class="r-out co"><span class="r-pr">#></span> 255 467 1 network</span>
<span class="r-out co"><span class="r-pr">#></span> 256 468 1 abroad</span>
<span class="r-out co"><span class="r-pr">#></span> 257 469 1 reaffirmed</span>
<span class="r-out co"><span class="r-pr">#></span> 258 470 1 planned</span>
<span class="r-out co"><span class="r-pr">#></span> 259 471 1 7</span>
<span class="r-out co"><span class="r-pr">#></span> 261 473 1 postponed</span>
<span class="r-out co"><span class="r-pr">#></span> 262 474 1 until</span>
<span class="r-out co"><span class="r-pr">#></span> 263 475 1 start</span>
<span class="r-out co"><span class="r-pr">#></span> 264 476 1 request</span>
<span class="r-out co"><span class="r-pr">#></span> 265 477 1 certain</span>
<span class="r-out co"><span class="r-pr">#></span> 266 478 1 body</span>
<span class="r-out co"><span class="r-pr">#></span> 267 479 1 deputy</span>
<span class="r-out co"><span class="r-pr">#></span> 268 480 1 energy</span>
<span class="r-out co"><span class="r-pr">#></span> 269 481 1 Fernando</span>
<span class="r-out co"><span class="r-pr">#></span> 270 482 1 Santos</span>
<span class="r-out co"><span class="r-pr">#></span> 271 483 1 Alvite</span>
<span class="r-out co"><span class="r-pr">#></span> 272 484 1 Wednesday</span>
<span class="r-out co"><span class="r-pr">#></span> 273 485 1 his</span>
<span class="r-out co"><span class="r-pr">#></span> 274 486 1 debt</span>
<span class="r-out co"><span class="r-pr">#></span> 275 487 1 burdened</span>
<span class="r-out co"><span class="r-pr">#></span> 276 488 1 country</span>
<span class="r-out co"><span class="r-pr">#></span> 277 489 1 wanted</span>
<span class="r-out co"><span class="r-pr">#></span> 278 490 1 assign</span>
<span class="r-out co"><span class="r-pr">#></span> 279 491 1 lower</span>
<span class="r-out co"><span class="r-pr">#></span> 280 492 1 seek</span>
<span class="r-out co"><span class="r-pr">#></span> 281 493 1 talks</span>
<span class="r-out co"><span class="r-pr">#></span> 282 494 1 opec</span>
<span class="r-out co"><span class="r-pr">#></span> 283 495 1 pricing</span>
<span class="r-out co"><span class="r-pr">#></span> 284 496 1 Referring</span>
<span class="r-out co"><span class="r-pr">#></span> 286 498 1 apparent</span>
<span class="r-out co"><span class="r-pr">#></span> 287 499 1 reference</span>
<span class="r-out co"><span class="r-pr">#></span> 288 500 1 faced</span>
<span class="r-out co"><span class="r-pr">#></span> 289 501 1 Qatar</span>
<span class="r-out co"><span class="r-pr">#></span> 290 502 1 We</span>
<span class="r-out co"><span class="r-pr">#></span> 292 504 1 continue</span>
<span class="r-out co"><span class="r-pr">#></span> 293 505 1 situation</span>
<span class="r-out co"><span class="r-pr">#></span> 294 506 1 later</span>
<span class="r-out co"><span class="r-pr">#></span> 295 507 1 improve</span>
<span class="r-out co"><span class="r-pr">#></span> 296 508 1 REUTER</span>
<span class="r-in"><span><span class="fu"><a href="https://rdrr.io/r/utils/head.html" class="external-link">head</a></span><span class="op">(</span><span class="va">df</span><span class="op">)</span></span></span>
<span class="r-out co"><span class="r-pr">#></span> token_id count token</span>
<span class="r-out co"><span class="r-pr">#></span> 1 3 13 said</span>
<span class="r-out co"><span class="r-pr">#></span> 2 4 6 that</span>
<span class="r-out co"><span class="r-pr">#></span> 3 6 1 today</span>
<span class="r-out co"><span class="r-pr">#></span> 4 7 4 it</span>
<span class="r-out co"><span class="r-pr">#></span> 5 8 3 had</span>
<span class="r-out co"><span class="r-pr">#></span> 6 10 9 its</span>
</code></pre></div>
</div>
</div>
<div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
<nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
</nav></div>
</div>
<footer><div class="copyright">
<p></p><p>Developed by Andreas Blaette, Bernard Desgraupes, Sylvain Loiseau.</p>
</div>
<div class="pkgdown">
<p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
</div>
</footer></div>
</body></html>