#include "common.h" //------------------------ // ALWAYS AVAILABLE //------------------------ #define PATTERNDEPTH 1000 bool nowarndupconcept = false; static unsigned char mathphrase[20]; unsigned int supplementalColumn = 0; static HEAPREF undefinedCallThreadList = NULL; static HEAPREF undefinedConceptThreadList = NULL; unsigned int currentBuild = 0; static bool nospellcheck = false; static bool noPatternOptimization = true; static unsigned int conceptID = 0; // name of concept set char* patternStarter = NULL; char* patternEnder = NULL; const char* linestartpoint = NULL; static bool isConcept = false; static WORDP currentFunctionDefinition; // current macro defining or executing HEAPREF deadfactsList = 0; HEAPREF languageadjustedfactsList = 0; static unsigned int complexity = 0; static bool livecall = false; bool echorulepattern = false; static unsigned int priorLine = 0; static char* currentTopicBots = NULL; bool autoset = false; static char macroName[MAX_WORD_SIZE]; static uint64 macroid; char* dataBase = NULL; static char* dataChunk = NULL; static char* outputStart = NULL; static char* lineStart = NULL; static bool globalBotScope = false; static HEAPREF beenHereThreadList = NULL; char* newScriptBuffer = NULL; char* oldScriptBuffer = NULL; static char display[MAX_DISPLAY][100]; static int displayIndex = 0; static char* incomingPtrSys = 0; // cache AFTER token find ptr when peeking. static char lookaheadSys[MAX_WORD_SIZE]; // cache token found when peeking static unsigned int hasWarnings; // number of warnings generated unsigned int hasErrors; uint64 grade = 0; // vocabulary warning char* lastDeprecation = 0; CompileStatus compiling = NOT_COMPILING; // script compiler in progress bool patternContext = false; // current compiling a pattern unsigned int buildId; // current build static int callingSystem = 0; static bool chunking = false; static unsigned int substitutes; static unsigned int cases; static unsigned int badword; static unsigned int functionCall; static bool isDescribe = false; char* tableinput = NULL; static char functionArguments[MAX_ARG_LIMIT + 1][500]; static int functionArgumentCount = 0; char warnings[MAX_WARNINGS][MAX_WORD_SIZE]; unsigned int warnIndex = 0; static char baseName[SMALL_WORD_SIZE]; char errors[MAX_ERRORS][MAX_WORD_SIZE]; unsigned int errorIndex = 0; char scopeBotName[MAX_WORD_SIZE]; static bool renameInProgress = false; static bool endtopicSeen = false; // needed when ending a plan static char* nextToken; // current lookahead token unsigned int buildID = 0; static char* topicFiles[] = // files created by a topic refresh from scratch { (char*)"describe", // document variables functions concepts topics etc (char*)"variables", // hold variables (char*)"facts", // hold facts (char*)"allfacts", // hold all binary fast facts (char*)"allwords", // hold all binary fast words (char*)"keywords", // holds topic and concepts keywords (char*)"macros", // holds macro definitions (char*)"map", // where things are defined (will also do map.json) (char*)"script", // hold topic definitions (char*)"plans", // hold plan definitions (char*)"dict", // dictionary changes (char*)"canon", // private canonical values 0 }; static void WriteKey(char* word); static FILE* mapFile = NULL; // for IDE static FILE* mapFileJson = NULL; // easier to parse static char* ReadMacro(char* ptr, FILE* in, char* kind, unsigned int build,char* data); static unsigned int mapTopicFileCount = 0; static unsigned int mapItemCount = 0; static unsigned int mapRuleCount = 0; void EraseTopicBin(unsigned int build, char* name) { int i = -1; int result; char file[SMALL_WORD_SIZE]; while (topicFiles[++i]) { sprintf(file, (char*)"%s/BUILD%s/%s%s.bin", topicfolder, name, topicFiles[i], name); // new style result = remove(file); } sprintf(file, (char*)"%s/BUILD%s/%sallfacts.bin", topicfolder, name, name); // new style result = remove(file); sprintf(file, (char*)"%s/BUILD%s/%sallwords.bin", topicfolder, name, name); // new style result = remove(file); } void InitScriptSystem() { compiling = NOT_COMPILING; undefinedCallThreadList = NULL; undefinedConceptThreadList = NULL; oldScriptBuffer = NULL; newScriptBuffer = NULL; mapFile = NULL; mapFileJson = NULL; outputStart = NULL; } void AddWarning(char* buffer) { char c = buffer[MAX_WORD_SIZE - 300]; size_t len = strlen(buffer); if (len > (MAX_WORD_SIZE - 300)) buffer[MAX_WORD_SIZE - 300] = 0; bool chop = false; if (buffer[len - 2] == '\r') buffer[len - 2] = 0; sprintf(warnings[warnIndex], (char*)"%s ending line %u col %u", buffer, currentFileLine, currentLineColumn); char* ptr = warnings[warnIndex] + strlen(warnings[warnIndex]); if (*currentFilename) sprintf(ptr, " of %s", currentFilename); warnIndex++; buffer[MAX_WORD_SIZE - 300] = c; strcat(buffer, "\r\n"); if (strstr(warnings[warnIndex-1],(char*)"is not a known word")) {++badword;} else if (strstr(warnings[warnIndex-1],(char*)" changes ")) {++substitutes;} else if (strstr(warnings[warnIndex-1],(char*)"is unknown as a word")) {++badword;} else if (strstr(warnings[warnIndex-1],(char*)"in opposite case")){++cases;} else if (strstr(warnings[warnIndex - 1], (char*)"multiple spellings")) { ++cases; } else if (strstr(warnings[warnIndex-1],(char*)"a function call")){++functionCall;} if (warnIndex >= MAX_WARNINGS) --warnIndex; } bool StartScriptCompiler(bool normal) { #ifndef DISCARDSCRIPTCOMPILER if (nextToken && normal) return false; // we are already in a build if (oldScriptBuffer) return false; // already running one conceptID = 0; beenHereThreadList = NULL; livecall = !normal; oldScriptBuffer = newScriptBuffer; // so we can nest calls to script compiler warnIndex = errorIndex = 0; newScriptBuffer = AllocateStack(NULL, maxBufferSize); nextToken = AllocateStack(NULL, maxBufferSize); // able to swallow big token #endif return true; } void EndScriptCompiler() { #ifndef DISCARDSCRIPTCOMPILER if (newScriptBuffer) { ReleaseStack(newScriptBuffer); newScriptBuffer = oldScriptBuffer; oldScriptBuffer = NULL; nextToken = NULL; } #endif } void ScriptError() { #ifndef DISCARDSCRIPTCOMPILER callingSystem = 0; chunking = false; outputStart = NULL; renameInProgress = false; if (compiling || csapicall == TEST_OUTPUT || csapicall == TEST_PATTERN) { ++hasErrors; patternContext = false; if (*scopeBotName) Log(USERLOG,"*** Error- line %d col %d of %s bot:%s : ", currentFileLine, currentLineColumn + supplementalColumn, currentFilename, scopeBotName); else Log(USERLOG,"*** Error- line %d col %d of %s: ", currentFileLine, currentLineColumn + supplementalColumn, currentFilename); } #endif } #ifndef DISCARDSCRIPTCOMPILER void ScriptWarn() { if (compiling) { ++hasWarnings; if (*currentFilename) { if (*scopeBotName) Log(USERLOG,"*** Warning- line %d col %d of %s bot:%s : ", currentFileLine, currentLineColumn + supplementalColumn,currentFilename, scopeBotName); else Log(USERLOG,"*** Warning- line %d col %d of %s: ", currentFileLine, currentLineColumn+supplementalColumn,currentFilename); } else Log(USERLOG,"*** Warning- "); } } static void AddBeenHere(WORDP D) { D->internalBits |= BEEN_HERE; beenHereThreadList = AllocateHeapval(HV1_WORDP,beenHereThreadList, (uint64)D);// save name } void UnbindBeenHere() { while (beenHereThreadList) { uint64 D; beenHereThreadList = UnpackHeapval(beenHereThreadList, D,discard); ((WORDP)D)->internalBits &= -1 ^ BEEN_HERE; } } #endif void AddError(char* buffer) { char seen[MAX_WORD_SIZE]; *seen = 0; char* at = seen; if (patternStarter && patternEnder && csapicall != COMPILE_PATTERN && csapicall != COMPILE_OUTPUT) { strcpy(at, "--> "); at += 4; size_t len = patternEnder - patternStarter; *patternEnder = 0; if (len < 100) strcpy(at, patternStarter); else { strncpy(at, patternStarter,50); at += 50; sprintf(at, "%s"," ... "); sprintf(at + 5, "%s", patternEnder - 50); } strcat(at, " <--"); } char message[MAX_WORD_SIZE]; if (*buffer == '\r') ++buffer; if (*buffer == '\n') ++buffer; size_t len = strlen(buffer); char* ptr = buffer + ( len > MAX_WORD_SIZE - 300 ? MAX_WORD_SIZE - 300 : len) - 1; while (ptr > buffer && (*ptr == '\n' || *ptr == '\r')) --ptr; char c = *(++ptr); *ptr = 0; if (!*currentFilename) // dse compilepattern { sprintf(message, "%s ended %s line %u col %u ", buffer, seen, currentFileLine, currentLineColumn+supplementalColumn); strcat(message, "\r\n"); } else sprintf(message, "%s ended line %u col %u of %s %s\r\n", buffer, currentFileLine, currentLineColumn+supplementalColumn, currentFilename, scopeBotName); *ptr = c; sprintf(errors[errorIndex++], (char*)"%s\r\n", message); if (errorIndex >= MAX_ERRORS) --errorIndex; } static char* FindComparison(char* word) { if (!*word || !word[1] || !word[2] || *word == '"') return NULL; // if token is short, we cannot do the below word+1 scans if (*word == '.') return NULL; // .<_3 is not a comparison if (*word == '\\') return NULL; // escaped is not a comparison if (*word == '!' && word[1] == '?' && word[2] == '$') return NULL; if (*word == '_' && word[1] == '?' && word[2] == '$') return NULL; if (*word == '?' && word[1] == '$') return NULL; char* at = strchr(word + 1, '!'); if (at && *word == '!') at = NULL; // ignore !! if (!at) { at = strchr(word + 1, '<'); if (at && at[1] == '<') return NULL; // << is not a comparison } if (!at) { at = strchr(word + 1, '>'); if (at && at[1] == '>') return NULL; // >> is not a comparison } if (!at) { at = strchr(word + 1, '&'); if (at && (at[1] == '_' || at[1] == ' ' || !(IsDigit(at[1]) || at[1] == USERVAR_PREFIX || at[1] == '#'))) at = 0; // ignore & as part of a name, e.g. AT&T } if (!at) at = strchr(word + 1, '='); if (!at) at = strchr(word + 1, '?'); // member of set if (!at) { at = strchr(word + 1, '!'); // negation if (at && (at[1] == '=' || at[1] == '?')); else at = NULL; } return at; } static void InsureAppropriateCase(char* word) { char c; char* at = FindComparison(word); // force to lower case various standard things // topcs/sets/classes/user vars/ functions and function vars are always lower case if (at) // a comparison has 2 sides { c = *at; *at = 0; InsureAppropriateCase(word); if (at[1] == '=' || at[1] == '?') InsureAppropriateCase(at + 2); // == or >= or such else InsureAppropriateCase(at + 1); *at = c; } else if (*word == '_' || *word == '\'') InsureAppropriateCase(word + 1); else if (*word == USERVAR_PREFIX) { char* dot = strchr(word, '.'); if (dot) *dot = 0; MakeLowerCase(word); if (dot) *dot = '.'; } else if ((*word == '^' && word[1] != '"') || *word == '~' || *word == SYSVAR_PREFIX || *word == '|') MakeLowerCase(word); else if (*word == '@' && IsDigit(word[1])) MakeLowerCase(word); // potential factref like @2subject } static int GetFunctionArgument(char* arg) // get index of argument (1-based) if it is value, else -1 { for (int i = 1; i <= functionArgumentCount; ++i) { if (!stricmp(arg, functionArguments[i])) return i; } return -1; // failed } static void FindDeprecated(char* ptr, char* value, char* message) { char* comment = strstr(ptr, (char*)"# "); char* at = ptr; size_t len = strlen(value); while (at) { at = strstr(at, value); if (!at) break; if (*(at - 1) == USERVAR_PREFIX) // $$xxx should be ignored { at += 2; continue; } if (comment && at > comment) return; // inside a comment char word[MAX_WORD_SIZE]; ReadCompiledWord(at, word); if (!stricmp(value, word)) { lastDeprecation = at; BADSCRIPT(message); } at += len; } } static void AddDisplay(char* word) { MakeLowerCase(word); for (int i = 0; i < displayIndex; ++i) { if (!strcmp(word, display[i])) return; // no duplicates needed } strcpy(display[displayIndex], word); if (++displayIndex >= MAX_DISPLAY) BADSCRIPT("Display argument limited to %d: %s\r\n", MAX_DISPLAY, word) } static char* ReadDisplay(FILE* in, char* ptr) { char word[SMALL_WORD_SIZE]; ptr = ReadNextSystemToken(in, ptr, word, false); while (1) { ptr = ReadNextSystemToken(in, ptr, word, false); if (*word == ')') break; if (*word != USERVAR_PREFIX) BADSCRIPT("Display argument must be uservar of $$ $ or $_: %s\r\n", word) if (strchr(word, '.')) BADSCRIPT("Display argument cannot be dot-selected %s\r\n", word) AddDisplay(word); // explicit display } return ptr; } char* ReadSystemToken(char* ptr, char* word, bool separateUnderscore) // how we tokenize system stuff (rules and topic system) words -remaps & to AND { *word = 0; if (!ptr) return 0; char tmp[MAX_WORD_SIZE]; char* start = word; *start = 0; ptr = SkipWhitespace(ptr); while (compiling == PIECE_COMPILE && *ptr == '\\' && ptr[1] == 'n') // api calls { ptr = SkipWhitespace(ptr + 2); currentFileLine += 1; currentLineColumn = 0; } FindDeprecated(ptr, (char*)"$login", (char*)"Deprecated $login needs to be $cs_login\r\n"); FindDeprecated(ptr, (char*)"$userfactlimit", (char*)"Deprecated $userfactlimit needs to be $cs_userfactlimit\r\n"); FindDeprecated(ptr, (char*)"$crashmsg", (char*)"Deprecated $crashmsg needs to be $cs_crashmsg\r\n"); FindDeprecated(ptr, (char*)"$randindex", (char*)"Deprecated $randindex needs to be $cs_randindex\r\n"); FindDeprecated(ptr, (char*)"$wildcardseparator", (char*)"Deprecated $wildcardseparator needs to be $cs_wildcardseparator\r\n"); FindDeprecated(ptr, (char*)"$abstract", (char*)"Deprecated $abstract needs to be $cs_abstract\r\n"); FindDeprecated(ptr, (char*)"$prepass", (char*)"Deprecated $prepass needs to be $cs_prepass\r\n"); FindDeprecated(ptr, (char*)"$control_main", (char*)"Deprecated $control_main needs to be $cs_control_main\r\n"); FindDeprecated(ptr, (char*)"$control_pre", (char*)"Deprecated $control_pre needs to be $cs_control_pre\r\n"); FindDeprecated(ptr, (char*)"$control_post", (char*)"Deprecated $control_post needs to be $cs_control_post\r\n"); #ifdef INFORMATION /*** A token is nominally a contiguous collection of characters broken off by tab or space(since return and newline are stripped off). Tokens to include whitespace are encased in doublequotes. Characters with reserved status automatically also break into individual tokens and to include them you must put \ before them.These include : []() {} always and separate into individual tokens except for _(_[_{ < > and << >> are reserved, but only when at start or end of token.Allowed comparisons embedded.As is <= and >= Tokens ending with ' or 's break off(possessive) in patterns. Tokens starting with prefix characters ' or ! or _ keep together, except per reserved tokens. '$junk is one token. Variables ending with punctuation separate the punctuation.$hello.is two tokens as is _0. Reserved characters in a composite token with _ before or after are kept.E.g.This_(_story_is_)_done You can include a reserved tokens by putting \ in front of them. Some tokens revise their start, like the pattern tokens representing comparison.They do this in the script compiler. ***/ #endif // strings if (*ptr == '\'' && ptr[1] == '"') { *word++ = *ptr++; } if (*ptr == '"' || (*ptr == '^' && ptr[1] == '"') || (*ptr == '^' && ptr[1] == '\'') || (*ptr == '\\' && ptr[1] == '"')) // doublequote maybe with functional heading { // simple \" if (*ptr == '\\' && (!ptr[2] || ptr[2] == ' ' || ptr[2] == '\t' || ptr[2] == '}' || ptr[2] == ENDUNIT)) // legal { *word = '\\'; word[1] = '"'; word[2] = 0; return ptr + 2; } bool backslash = false; bool noblank = true; bool functionString = false; if (*ptr == '^') { *word++ = *ptr++; // ^"script" swallows ^ noblank = false; // allowed blanks at start or rear functionString = true; } else if (*ptr == '\\') // \"string is this" { backslash = true; ++ptr; } char* end = ReadQuote(ptr,word,backslash,noblank,MAX_WORD_SIZE); // swallow ending marker and points past if (end) { if (*word == '"' && word[1] != FUNCTIONSTRING && !functionString) return end; // all legal within // NOW WE SEE A FUNCTION STRING // when seeing ^, see if it remaps as a function argument // check for internal ^ also... char* hat = word - 1; if ((*word == '"' || *word == '\'') && functionString) hat = word; // came before else if (*word == '"' && word[1] == FUNCTIONSTRING) hat = word + 1; else if ((word[1] == '"' || word[1] == '\'') && *word == FUNCTIONSTRING) hat = word; // locate any local variable references in active strings char* at = word; while ((at = strchr(at,USERVAR_PREFIX))) { if (at[1] == LOCALVAR_PREFIX) { char* start = at; while (++at) { if (!IsAlphaUTF8OrDigit(*at) && *at != '_' && *at != '-') { char c = *at; *at = 0; AddDisplay(start); *at = c; break; } } } else ++at; } while ((hat = strchr(hat + 1,'^'))) // find a hat within { if (IsDigit(hat[1])) continue; // normal internal if (*(hat - 1) == '\\') continue; // escaped char* atx = hat; while (*++atx && (IsAlphaUTF8OrDigit(*atx) || *atx == '_')) { ; } char c = *atx; *atx = 0; int index = GetFunctionArgument(hat); WORDP D = FindWord(hat); // in case its a function name *atx = c; if (index >= 0) // was a function argument { strcpy(tmp,atx); // protect chunk sprintf(hat,(char*)"^%d%s",index,tmp); } else if (D && D->internalBits & FUNCTION_NAME) { ; } else if (!renameInProgress && !(hat[1] == USERVAR_PREFIX || hat[1] == MATCHVAR_PREFIX)) { *atx = 0; WARNSCRIPT((char*)"%s is not a recognized function argument. Is it intended to be?",hat) *atx = c; } } hat = word - 1; while ((hat = strchr(hat + 1,'_'))) // rename _var? { if (*(hat - 1) == '$') continue; if (IsAlphaUTF8OrDigit(*(hat - 1)) || *(hat - 1) == '_' || *(hat - 1) == '-') continue; // not a starter if (IsDigit(hat[1])) continue; // normal _ var if (*(hat - 1) == '\\' || *(hat - 1) == '"') continue; // escaped or quoted char* atx = hat; while (*++atx && (IsAlphaUTF8OrDigit(*atx))) { ; } // find end WORDP D = FindWord(hat,atx - hat,LOWERCASE_LOOKUP); if (D && D->internalBits & RENAMED) // remap matchvar inside string { strcpy(tmp,atx); // protect chunk sprintf(hat + 1,(char*)"%u%s",(unsigned int)D->properties,tmp); } } hat = word - 1; while ((hat = strchr(hat + 1,'@'))) // rename @set? { if (IsAlphaUTF8OrDigit(*(hat - 1))) continue; // not a starter if (IsDigit(hat[1]) || hat[1] == '_') continue; // normal @ var or @_marker if (*(hat - 1) == '\\') continue; // escaped char* atx = GetSetEnd(hat); WORDP D = FindWord(hat,atx - hat,LOWERCASE_LOOKUP); if (D && D->internalBits & RENAMED) // rename @set inside string { strcpy(tmp,atx); // protect chunk sprintf(hat + 1,(char*)"%u%s",(unsigned int)D->properties,tmp); } else if (!renameInProgress) // can do anything safely in a simple quoted string { char c = 0; if (at && *at) { c = *at; *at = 0; } WARNSCRIPT((char*)"%s is not a recognized @rename. Is it intended to be?\r\n",hat) if (c) *at = c; } } hat = word - 1; if (strstr(readBuffer, "rename:")) // accept rename of existing constant twice in a row hat = " "; while ((hat = strchr(hat + 1,'#'))) // rename #constant or ##constant { if (*(hat - 1) == '\\') continue; // escaped if (IsAlphaUTF8OrDigit(*(hat - 1)) || IsDigit(hat[1]) || *(hat - 1) == '&') continue; // not a starter, maybe #533; constant stuff char* at = hat; if (at[1] == '#') ++at; // user constant while (*++at && (IsAlphaUTF8OrDigit(*at) || *at == '_')) { ; } // find end strcpy(tmp,at); // protect chunk *at = 0; uint64 n; if (hat[1] == '#' && IsAlphaUTF8(hat[2])) // user constant { WORDP D = FindWord(hat,at - hat,LOWERCASE_LOOKUP); if (D && D->internalBits & RENAMED) // remap #constant inside string { n = D->properties; if (D->internalBits & CONSTANT_IS_NEGATIVE) { int64 x = (int64)n; x = -x; sprintf(hat,(char*)"%lld%s",(long long int) x,tmp); } else { sprintf(hat,(char*)"%lld%s",(long long int) n,tmp); } } } else // system constant { n = FindPropertyValueByName(hat + 1); if (!n) n = FindSystemValueByName(hat + 1); if (!n) n = FindParseValueByName(hat + 1); if (!n) n = FindMiscValueByName(hat + 1); if (n) { sprintf(hat,(char*)"%lld%s",(long long int) n,tmp); } } if (!*hat) { *hat = '#'; BADSCRIPT((char*)"Bad # constant %s\r\n",hat) } } return end; // if we did swallow a string } if (*ptr == '\\') // was this \"xxx with NO closing { memmove(word + 1,word,strlen(word) + 1); *word = '\\'; } else { word = start; if (*start == '^') --ptr; } } // reading concept, careful handling for emoticons emoji if (isConcept) { if (!*ptr) return ptr; while (*ptr != ' ' && *ptr) *word++ = *ptr++; *word = 0; if (*(word - 1) == ')') // closing ) for concept, not part of emoticon? { if (( *start == '~' || IsAlphaUTF8(*start)) && IsAlphaUTF8(start[1]) && IsAlphaUTF8(start[2])) // clear word { --ptr; --word; } } *word = 0; if (start) return ptr; // test always true, but makes debug visible on start } // the normal composite token bool quote = false; char* xxorig = ptr; bool var = (*ptr == '$'); int brackets = 0; char quotechar = '"'; bool activestring = false; while (*ptr) { if (*ptr == ENDUNIT) break; if (patternContext && quote) {} // allow stuff in comparison quote else if (*ptr == ' ' || (*ptr == '\t' && convertTabs) || (*ptr == '\\' && ptr[1] == 'n' && compiling == PIECE_COMPILE)) break; // legal if (patternContext && activestring == false && *ptr == '^' && (ptr[1] == '"' || ptr[1] == '\'')) { quotechar = ptr[1]; activestring = true; } if (patternContext && *ptr == quotechar) quote = !quote; char c = *ptr++; if (c == '\t' && !convertTabs && word != start) { --ptr; break; // end word with tab } // not for output, but for patterns, track line numbers if (csapicall == COMPILE_PATTERN && c == '\\' && (*ptr == 'n' || *ptr == '\t' || *ptr == '\r')) // break off manual new line { --ptr; break; } *word++ = c; *word = 0; if (*start == '\t' && !convertTabs ) break; // return tab as unique word if ((word - start) > (MAX_WORD_SIZE - 2)) break; // avoid overflow if (c == '\\') *word++ = *ptr++; //escaped // want to leave array json notation alone but react to [...] touching a variable - $var] else if (var && c == '[') // ANY variable should be separated by space from a [ if not json array { ++brackets; // this MUST then be a json array and brackets will balance if (brackets > 1) BADSCRIPT("$var MUST be separated from [ unless you intend json array reference\r\n") } else if (var && c == ']') { if (--brackets < 0) // if brackets is set, we must be in json array { --ptr; --word; break; } } else if (GetNestingData(c) && !quote) // break off nesting attached to a started token unless its an escaped token { size_t len = word - start; if (len == 1) break; // automatically token by itself if (len == 2) { if ((*start == '_' || *start == '!') && (c == '[' || c == '(' || c == '{')) break; // one token as _( or !( if (*start == '\\') break; // one token escaped } // split off into two tokens --ptr; --word; break; } } *word = 0; word = start; size_t len = strlen(word); if (len == 0) return ptr; if (patternContext && word[len - 1] == '"' && word[len - 2] != '\\') { char* quote = strchr(word, '"'); if (quote == word + len - 1) BADSCRIPT("Tailing quote without start: %s\r\n", word) } if (*word == '#' && !strstr(readBuffer,"rename:")) // is this a constant from dictionary.h? or user constant { uint64 n; if (word[1] == '#' && IsAlphaUTF8(word[2])) // user constant { WORDP D = FindWord(word,0,LOWERCASE_LOOKUP); if (D && D->internalBits & RENAMED) // remap #constant { n = D->properties; if (D->internalBits & CONSTANT_IS_NEGATIVE) { int64 x = (int64)n; x = -x; sprintf(word,(char*)"%lld",(long long int) x); } else { sprintf(word,(char*)"%lld",(long long int) n); } } else if (renameInProgress) { ; } // leave token alone, defining else BADSCRIPT((char*)"Bad user constant %s\r\n",word) } else // system constant { n = FindPropertyValueByName(word + 1); if (!n) n = FindSystemValueByName(word + 1); if (!n) n = FindParseValueByName(word + 1); if (!n) n = FindMiscValueByName(word + 1); if (n) { sprintf(word,(char*)"%lld",(long long int) n); } else if (!IsDigit(word[1]) && word[1] != '!') //treat rest as a comment line (except if has number after it, which is user text OR internal arg reference for function { if (!stricmp(word, "#Tokens:")) { ; } else if (IsAlphaUTF8(word[1])) BADSCRIPT((char*)"Bad numeric # constant %s\r\n",word) *ptr = 0; *word = 0; } } } if (*word == '_' && (IsAlphaUTF8(word[1]))) // is this a rename _ { WORDP D = FindWord(word); if (D && D->internalBits & RENAMED) sprintf(word + 1,(char*)"%u",(unsigned int)D->properties); // remap match var convert to number // patterns can underscore ANYTING } if (*word == '\'' && word[1] == '_' && (IsAlphaUTF8(word[2]))) // is this a rename _ with ' { WORDP D = FindWord(word + 1); if (D && D->internalBits & RENAMED) sprintf(word + 2,(char*)"%u",(unsigned int)D->properties); // remap match var convert to number else if (!renameInProgress && !patternContext) // patterns can underscore ANYTING WARNSCRIPT((char*)"%s is not a recognized _rename. Should it be?\r\n",word + 1) } if (*word == '@' && IsAlphaUTF8(word[1])) // is this a rename @ { char* at = GetSetEnd(word); WORDP D = FindWord(word,at - word); if (D && D->internalBits & RENAMED) // remap @set in string { strcpy(tmp,at); // protect chunk sprintf(word + 1,(char*)"%u%s",(unsigned int)D->properties,tmp); } else if (!renameInProgress) WARNSCRIPT((char*)"%s is not a recognized @rename. Is it intended to be?\r\n",word) } if (*word == '@' && word[1] == '_' && IsAlphaUTF8(word[2])) // is this a rename @_0+ { size_t lenx = strlen(word); WORDP D = FindWord(word + 1, lenx - 1); // @_data marker char c = 0; if (!D) { c = word[lenx - 1]; word[lenx - 1] = 0; D = FindWord(word + 1, lenx - 2); word[lenx - 1] = c; } if (D && D->internalBits & RENAMED) { if (c) sprintf(word + 2,(char*)"%u%c",(unsigned int)D->properties,c); // remap @set in string else sprintf(word + 2, (char*)"%u", (unsigned int)D->properties); // remap @set in string } else if (!renameInProgress) WARNSCRIPT((char*)"%s is not a recognized @rename. Is it intended to be?\r\n",word) } // some tokens require special splitting // break off starting << from <> from hello>> if (len > 2 && word[len - 1] == '>') { if (len == 3 && *word == word[1] && word[2] == '=') { ; } else if (word[len - 2] == '>') { ptr -= 2; word[len - 2] = 0; len -= 2; } } // break off punctuation from variable end if (len > 2 && ((*word == USERVAR_PREFIX && !IsDigit(word[1])) || *word == '^' || (*word == '@' && IsDigit(word[1])) || *word == SYSVAR_PREFIX || (*word == MATCHVAR_PREFIX && IsDigit(word[1])) || (*word == '\'' && word[1] == '_'))) // not currency { if (word[len - 2] == '\\') {} // not escaped ending punctuationes else if (!patternContext || word[len - 1] != '?') // BUT NOT $$xxx? in pattern context { while (IsRealPunctuation(word[len - 1])) // one would be enough, but $hello... needs to be addressed { --len; --ptr; } word[len] = 0; } } // break off opening < in pattern if (patternContext && *word == '<' && word[1] != '<') { ptr -= len - 1; len = 1; word[1] = 0; } // break off closing > in pattern unless escaped or notted if (len == 2 && (*word == '!' || *word == '\\')) { ; } else if (patternContext && len > 1 && word[len - 1] == '>' && word[len - 2] != '>' && word[len - 2] != '_' && word[len - 2] != '!') { ptr -= 1; --len; word[len] = 0; } // find internal comparison op if any char* at = (patternContext) ? FindComparison(word) : 0; if (at && *word == '*' && !IsDigit(word[1])) { if (compiling) BADSCRIPT((char*)"TOKENS-1 Cannot do comparison on variable gap %s . Memorize and compare against _# instead later.\r\n",word) } if (at && *at == '!' && at[1] == '$') { ; } // allow !$xxx else if (at) // revise comparison operators { if (*at == '!') ++at; ++at; if (*at == '^' && at[1]) // remap function arg on right side. { int index = GetFunctionArgument(at); if (index >= 0) sprintf(at,(char*)"^%d",index); } if (*at == '_' && IsAlphaUTF8(word[1])) // remap rename matchvar arg on right side. { WORDP D = FindWord(at); if (D && D->internalBits & RENAMED) sprintf(at,(char*)"_%u",(unsigned int)D->properties); } if (*at == '@' && IsAlphaUTF8(word[1])) // remap @set arg on right side. { char* at1 = GetSetEnd(at); WORDP D = FindWord(at,at1 - at); if (D && D->internalBits & RENAMED) // remap @set on right side { strcpy(tmp,at1); // protect chunk sprintf(at + 1,(char*)"%u%s",(unsigned int)D->properties,tmp); } } // check for remap on LHS if (*word == '^') { char c = *--at; *at = 0; int index = GetFunctionArgument(word); *at = c; if (index >= 0) { sprintf(tmp,(char*)"^%d%s",index,at); strcpy(word,tmp); } } // check for rename on LHS if (*word == '_' && IsAlphaUTF8(word[1])) { char* atx = word; while (IsAlphaUTF8OrDigit(*++atx)) { ; } WORDP D = FindWord(word,atx - word); if (D && D->internalBits & RENAMED) // remap match var { sprintf(tmp,(char*)"%u%s",(unsigned int)D->properties,atx); strcpy(word + 1,tmp); } } // check for rename on LHS if (*word == '@' && IsAlphaUTF8(word[1])) { char* atx = GetSetEnd(word); WORDP D = FindWord(word,atx - word); if (D && D->internalBits & RENAMED) // remap @set in string { strcpy(tmp,atx); // protect chunk sprintf(word + 1,(char*)"%u%s",(unsigned int)D->properties,tmp); } } } // when seeing ^, see if it remaps as a function argument // check for internal ^ also... char* hat = word - 1; if (*word == '$' && word[1] == '^' && strstr(word, ":=")) { return ptr; } // fn define else while ((hat = strchr(hat + 1,'^'))) // find a hat within { char* at = hat; while (*++at && (IsAlphaUTF8(*at) || *at == '_' || IsDigit(*at))) { ; } char c = *at; *at = 0; // terminate it so internal ^ is recognized uniquely strcpy(tmp,hat); *at = c; while (*tmp) { int index = GetFunctionArgument(tmp); if (index >= 0) { char remainder[MAX_WORD_SIZE]; strcpy(remainder,at); // protect chunk AFTER this sprintf(hat,(char*)"^%d%s",index,remainder); break; } else tmp[0] = 0; // just abort it for now shrink it smaller, to handle @9subject kinds of behaviors } } // same for quoted function arg if (*word == '\'' && word[1] == '^' && word[2]) { int index = GetFunctionArgument(word + 1); if (index >= 0) sprintf(word,(char*)"'^%d",index); } // break apart math on variables eg $value+2 as a service to the user if ((*word == '%' || *word == '$') && word[1]) // cannot use _ here as that will break memorization pattern tokens { char* atx = word + 1; if (atx[1] == '$' || atx[1] == '_') ++atx; // skip over 2ndary marker --atx; while (LegalVarChar(*++atx) || (*atx == '\\' && atx[1] == '$')); // find end of initial word - allowing \ for json $ if (*word == '$' && (*atx == '.' || *atx == '[' || *atx == ']') && (LegalVarChar(atx[1]) || atx[1] == '$' || atx[1] == '[' || atx[1] == ']' || (atx[1] == '\\' && atx[2] == '$')))// allow $x.y as a complete name { while (LegalVarChar(*++atx) || *atx == '.' || *atx == '$' || (*atx == '[' || *atx == ']' || (*atx == '\\' && atx[1] == '$'))); // find end of field name sequence if (*(atx - 1) == '.') --atx; // tailing period cannot be part of it } if (*atx && IsPunctuation(*atx) & ARITHMETICS && *atx != '=') { if (*atx == '.' && atx[1] == '_' && IsDigit(atx[2])) {} // json field reference indirection else if (*atx == '.' && atx[1] == '\'' && atx[2] == '_' && IsDigit(atx[3])) {} // json field reference indirection // - is legal in a var or word token else if (*atx != '-' || (!IsAlphaUTF8OrDigit(atx[1]) && atx[1] != '_')) { ptr -= strlen(atx); *atx = 0; len = atx - start; } } } char* tilde = (IsAlphaUTF8(*word)) ? strchr(word + 1,'~') : 0; if (tilde && *word != '$') // has specific meaning like African-american~1n or African-american~1 - we compile to master to detect meaning { if (IsDigit(*++tilde)) // we know the meaning, removing any POS marker since that is redundant { if (IsDigit(*++tilde)) ++tilde; if (*tilde && !tilde[1]) *tilde = 0; // trim off pos marker // now force meaning to master MEANING M = ReadMeaning(word,true,false); if (M) { M = GetMaster(M); sprintf(word,(char*)"%s~%u",Meaning2Word(M)->word,Meaning2Index(M)); } } } // universal cover of simple use - complex tokens require processing elsewhere if (*word == USERVAR_PREFIX && word[1] == LOCALVAR_PREFIX) { char* at = word + 1; while (*++at) { if (!IsAlphaUTF8OrDigit(*at) && *at != '-' && *at != '_') break; } if (*at == '.') // root of a dotted variable { *at = 0; AddDisplay(word); *at = '.'; } else if (!*at) AddDisplay(word); } InsureAppropriateCase(word); if (csapicall != NO_API_CALL && !stricmp(word, "#!")) // compilepattern api comment { ptr = strstr(ptr, "!#"); if (!ptr) return ptr; // failed somehow ptr += 2; if (*ptr) ptr = ReadSystemToken(ptr, word, separateUnderscore); } return ptr; } void EraseTopicFiles(unsigned int build, char* name) { int i = -1; while (topicFiles[++i]) { char file[SMALL_WORD_SIZE]; sprintf(file, (char*)"%s/%s%s.txt", topicfolder, topicFiles[i], name); // old style int result = remove(file); sprintf(file, (char*)"%s/BUILD%s/%s%s.txt", topicfolder, name, topicFiles[i], name); // new style result = remove(file); if (!strcmp(topicFiles[i],(char*)"map")) { sprintf(file, (char*)"%s/BUILD%s/%s%s.json", topicfolder, name, topicFiles[i], name); // new style result = remove(file); } } EraseTopicBin(build, name); } #ifndef DISCARDSCRIPTCOMPILER static char* WriteDisplay(char* pack) { *pack++ = '('; *pack++ = ' '; if (displayIndex) // show and merge in the new stuff { for (int i = 0; i < displayIndex; ++i) { strcpy(pack,display[i]); pack += strlen(pack); *pack++ = ' '; } displayIndex = 0; } *pack++ = ')'; *pack++ = ' '; *pack = 0; return pack; } static char* FindAssignment(char* word) { char* assign = strchr(word + 1, ':'); if (!assign || (assign[1] != '=' && assign[2] != '=')) return NULL; return assign; } static void AddMapOutput(int line) { if (livecall) return; // if we are mapping (:build) and have started output and some data storage change has happened if (mapFile && dataBase && lineStart != dataChunk && strnicmp(macroName, "^tbl:", 5)) { *dataChunk = 0; char src[MAX_WORD_SIZE]; strncpy(src, lineStart, 30); src[30] = 0; fprintf(mapFile, (char*)" line: %d %d # %s\r\n", line, (int)(lineStart - dataBase),src); // readBuffer } lineStart = dataChunk; // used to detect new line needs tracking } char* ReadNextSystemToken(FILE* in,char* ptr, char* word, bool separateUnderscore, bool peek) { #ifdef INFORMATION The outside can ask for the next real token or merely peek ahead one token.And sometimes the outside after peeking, decides it wants to back up a real token(passing it to some other processor). To support backing up a real token, the system must keep the current readBuffer filled with the data that led to that token(to allow a ptr - strlen(word) backup). To support peeking, the system may have to read a bunch of lines in to find a token.It is going to need to track that buffer separately, so when it needs a real token which was the peek, it can both get the peek value and be using contents of the new buffer thereafter. So peeks must never touch the real readBuffer.And real reads must know whether the last token was peeked and from which buffer it was peeked. And, if someone wants to back up to allow the old token to be reread, they have to CANCEL any peek data, so the token comes from the old buffer.Meanwhile the newbuffer continues to have content for when the old buffer runs out. #endif if (!peek) currentFileLine = maxFileLine; // return to context real int line = currentFileLine; int column = currentLineColumn; // clear peek cache if (!in && !ptr) // clear cache request, next get will be from main buffer (though secondary buffer may still have peek read data) { if (word) *word = 0; incomingPtrSys = NULL; // no longer holding a PEEK value. return NULL; } char* result = NULL; if (incomingPtrSys ) // had a prior PEEK, now in cache. use up cached value, unless duplicate peeking { result = incomingPtrSys; // caller who is peeking will likely ignore this if (!peek) { currentFileLine = maxFileLine; // revert to highest read // he wants reality now... if (newScriptBuffer && *newScriptBuffer) // prior peek was from this buffer, make it real data in real buffer { strcpy(readBuffer,newScriptBuffer); result = (result - newScriptBuffer) + readBuffer; // adjust pointer to current buffer *newScriptBuffer = 0; } strcpy(word,lookaheadSys); incomingPtrSys = 0; } else { strcpy(word,lookaheadSys); // duplicate peek result = (char*)1; // NO ONE SHOULD KEEP A PEEKed PTR } if (result == (char*)1) { } else if ((csapicall == COMPILE_PATTERN || csapicall == COMPILE_OUTPUT)) currentLineColumn = (result - linestartpoint); else currentLineColumn = (result - readBuffer); if (currentFunctionDefinition && (currentFunctionDefinition->internalBits & FUNCTION_BITS) == IS_PATTERN_MACRO) { char* bad = strstr(word, "$_"); if (bad) BADSCRIPT("Not allowed to use local variables %s in a pattern macro %s", word, currentFunctionDefinition->word) } return result; } *word = 0; if (ptr) { result = ReadSystemToken(ptr, word, separateUnderscore); } bool newln = false; while (!*word) // found no token left in existing buffer - we have to juggle buffers now unless running overwrite { if (!newln && newScriptBuffer && *newScriptBuffer) // use pre-read buffer per normal, it will have a token { strcpy(readBuffer,newScriptBuffer); *newScriptBuffer = 0; result = ReadSystemToken(readBuffer,word,separateUnderscore); break; } else // read new line into hypothetical buffer, not destroying old actual buffer yet { if (!in || ReadALine(newScriptBuffer,in, maxBufferSize,false, convertTabs) < 0) return NULL; // end of file if (!strnicmp(newScriptBuffer,(char*)"#ignore",7)) // hit an ignore zone { unsigned int ignoreCount = 1; while (ReadALine(newScriptBuffer,in) >= 0) { if (!strnicmp(newScriptBuffer,(char*)"#ignore",7)) ++ignoreCount; else if (!strnicmp(newScriptBuffer,(char*)"#endignore",10)) { if (--ignoreCount == 0) { if (ReadALine(newScriptBuffer,in) < 0) return NULL; // EOF break; } } } if (ignoreCount) return NULL; //EOF before finding closure } result = ReadSystemToken(newScriptBuffer,word,separateUnderscore); // result is ptr into NEWBUFFER newln = true; } } if (peek) // save request - newScriptBuffer has implied newln if any { incomingPtrSys = result; // next location in whatever buffer strcpy(lookaheadSys,word); // save next token peeked result = (char*)1; // NO ONE SHOULD KEEP A PEEKed PTR currentFileLine = line; // claim old value currentLineColumn = column; } else if (newln && newScriptBuffer) // live token from new buffer, adjust pointers and buffers to be fully up to date { strcpy(readBuffer,newScriptBuffer); result = (result - newScriptBuffer) + readBuffer; // ptr into current readBuffer now *newScriptBuffer = 0; } if (result == (char*)1 ) { } else if ((csapicall == COMPILE_PATTERN || csapicall == COMPILE_OUTPUT)) currentLineColumn = (result - linestartpoint); else currentLineColumn = (result - readBuffer); if (currentFunctionDefinition && (currentFunctionDefinition->internalBits & FUNCTION_BITS) == IS_PATTERN_MACRO) { char* bad = strstr(word, "$_"); if (bad) BADSCRIPT("Not allowed to use local variables %s in a pattern macro %s", word, currentFunctionDefinition->word) } return result; // ptr into READBUFFER or 1 if from peek zone } char* ReadDisplayOutput(char* ptr,char* buffer) // locate next output fragment to display (that will be executed) { char next[MAX_WORD_SIZE]; char* hold; *buffer = 0; char* out = buffer; while (*ptr != ENDUNIT) // not end of data { char* before = ptr; ptr = ReadCompiledWord(ptr,out); // move token if (*out && out[1] && out[2] && (out[3] == '{' || out[3] == '(') && !out[4]) // accellerator + opening? { ptr = before + ACCELLSIZE; // ignore accel continue; } if (*out && out[1] && out[2] && !out[3] && ptr[0] == '{') // accellerator + opening? { continue; // accel before else final code } if (!strnicmp(ptr, "else", 4) && !out[3]) { continue; // skip accel before else } char* copied = out; out += strlen(out); strcpy(out,(char*)" "); ++out; *out = 0; hold = ReadCompiledWord(ptr,next); // and the token after that? if (IsAlphaUTF8OrDigit(*copied) ) // simple output word was copied { if (!*next || !IsAlphaUTF8OrDigit(*next)) break; // followed by something else simple } else if (*buffer == ':' && buffer[1]) // testing command occupies the rest always { char* end = strchr(ptr,ENDUNIT); if (end) { strncpy(out,ptr,end-ptr); out += end-ptr; *out = 0; } ptr = NULL; break; } else if (*buffer == '^' && *next == '(') // function call { char* end = BalanceParen(ptr+1,true,false); // function call args strncpy(out,ptr,end-ptr); out += end-ptr; *out = 0; ptr = end; break; } else if ((*buffer == USERVAR_PREFIX && (buffer[1] == LOCALVAR_PREFIX || buffer[1] == TRANSIENTVAR_PREFIX || IsAlphaUTF8(buffer[1]) )) || (*buffer == SYSVAR_PREFIX && IsAlphaUTF8(buffer[1])) || (*buffer == '@' && IsDigit(buffer[1])) || (*buffer == '_' && IsDigit(buffer[1])) ) // user or system variable or factset or match variable { if (*next != '=' && next[1] != '=') break; // not an assignment statement while (hold) // read op, value pairs { strcpy(out,next); // transfer assignment op or arithmetic op out += strlen(out); strcpy(out,(char*)" "); ++out; ptr = ReadCompiledWord(hold,next); // read value strcpy(out,next); // transfer value out += strlen(out); // if value is a function call, get the whole call if (*next == '^' && *ptr == '(') { char* end = BalanceParen(ptr+1,true,false); // function call args strncpy(out,ptr,end-ptr); out += end-ptr; *out = 0; ptr = end; } strcpy(out,(char*)" "); ++out; if (*ptr != ENDUNIT) // more to rule { hold = ReadCompiledWord(ptr,next); // is there more to assign if (IsArithmeticOperator(next)) continue; // need to swallow op and value pair } break; } break; } else if (*buffer == '[') // choice area { // find closing ] char* end = ptr-1; while (ALWAYS) { end = strchr(end+1,']'); // find a closing ] if (!end) break; // failed if (*(end-1) != '\\') break; // ignore literal \[ } if (end) // found end of a [] pair { ++end; strncpy(out,ptr,end-ptr); out += end-ptr; *out = 0; ptr = end + 1; if (*ptr != '[') break; // end of choice zone } } else break; } if (!stricmp(buffer,(char*)"^^loop ( -1 ) ")) strcpy(buffer,(char*)"^^loop ( ) "); // shorter notation return ptr; } ////////////////// CAN BE COMPILED AWAY #ifndef DISCARDSCRIPTCOMPILER #define MAX_TOPIC_SIZE 500000 #define MAX_TOPIC_RULES 32767 #define MAX_TABLE_ARGS 20 static unsigned int hasPlans; // how many plans did we read static int missingFiles; // how many files of topics could not be found static int spellCheck = 0; // what do we spell check static int topicCount = 0; // how many topics did we compile static char duplicateTopicName[MAX_WORD_SIZE]; // potential topic name repeated static char assignKind[MAX_WORD_SIZE]; // what we are assigning from in an assignment call static char currentTopicName[MAX_WORD_SIZE]; // current topic being read static char lowercaseForm[MAX_WORD_SIZE]; // a place to put a lower case copy of a token static char verifyLines[100][MAX_WORD_SIZE]; // verification lines for a rule to dump after seeing a rule static unsigned int verifyIndex = 0; // index of how many verify lines seen static char* ReadLoop(char* word, char* ptr, FILE* in, char* &data,char* rejoinders,bool json); #ifdef INFORMATION Script compilation validates raw topic data files amd converts them into efficient-to-execute forms. This means creating a uniform spacing of tokens and adding annotations as appropriate. Reading a topic file (on the pattern side) often has tokens jammed together. For example all grouping characters like () [ ] { } should be independent tokens. Possessive forms like cat's and cats' should return as two tokens. Just as all contractions will get expanded to the full word. Some tokens can be prefixed with ! or single-quote or _ . In order to be able to read special characters (including prefix characters) literally, one can prefix it with \ as in \[ . The token returned includes the \. \! means the exclamation mark at end of sentence. You are not required to do \? because it is directly a legal token, but you can. You CANNOT test for . because it is the default and is subsumed automatically. #endif static void AddMap(char* kind,char* msg,unsigned int *itemCount) { if (!mapFile) return; if (kind) { char value[MAX_WORD_SIZE]; strcpy(value, kind); char* at = strchr(value, ':'); if (at) { sprintf(at + 1, " %u ", currentFileLine); at = strchr(kind, ':'); strcat(value, at + 1); } fprintf(mapFile,(char*)"%s %s",value, (msg) ? msg : ((char*)"")); if (myBot && !strstr(value,"rule:") && !strstr(value, "complexity of")) { fprintf(mapFile, (char*)" %s", PrintU64(myBot)); } fprintf(mapFile, (char*)"\r\n"); if (mapFileJson) { char keyword[MAX_WORD_SIZE]; strcpy(keyword, kind); char* key = TrimSpaces(keyword, true); at = strchr(key, ':'); if (at && *at) *at = 0; if (itemCount) ++(*itemCount); if (*itemCount > 1) fprintf(mapFileJson, (char*)",\r\n"); fprintf(mapFileJson, (char*)"{\r\n"); if (!strcmp(key,"rule")) { char* rulekind = strrchr(at+1, ' '); if (rulekind) *rulekind = 0; char* tag = strchr(at+1, '.'); if (tag) *tag = 0; char* label = strrchr(tag+1, '-'); if (label && IsDigit(*(label-1))) *label = 0; fprintf(mapFileJson, (char*)"\"%s\" : \"%s\"",key,tag+1); if (rulekind) fprintf(mapFileJson, (char*)",\r\n\"kind\" : \"%s\"",rulekind+1); if (label) fprintf(mapFileJson, (char*)",\r\n\"label\" : \"%s\"",label+1); } else fprintf(mapFileJson, (char*)"\"%s\" : \"%s\"",key,msg); if (currentFileLine > 0) fprintf(mapFileJson, (char*)",\r\n\"line\" : %u",currentFileLine); if (!strcmp(key,"concept")) fprintf(mapFileJson,"\r\n}"); } } else { fprintf(mapFile,(char*)"%s\r\n",msg); // for complexity metric if (mapFileJson && strstr(msg, "omplexity")) { char* at = strrchr(msg, ':'); if (at) { unsigned int comp = atoi(at + 2); fprintf(mapFileJson, (char*)",\r\n\"complexity\" : %u", comp); fprintf(mapFileJson, "\r\n}"); } } } } static void ClearBeenHere(WORDP D, uint64 junk) { RemoveInternalFlag(D,BEEN_HERE); // clear transient ignore spell warning flag if (*D->word != '^' && *D->word != '~' && *D->word != '$' && D->internalBits & DO_NOISE && !(D->systemFlags & HAS_SUBSTITUTE)) RemoveInternalFlag(D, DO_NOISE); } bool TopLevelUnit(char* word) // major headers (not kinds of rules) { return (!stricmp(word,(char*)":quit") || !stricmp(word,(char*)"canon:") || !stricmp(word,(char*)"replace:") || !stricmp(word, (char*)"debug:") || !stricmp(word, (char*)"ignorespell:") || !stricmp(word, (char*)"prefer:") || !stricmp(word,(char*)"query:") || !stricmp(word, (char*)"word:") || !stricmp(word,(char*)"concept:") || !stricmp(word,(char*)"data:") || !stricmp(word,(char*)"plan:") || !stricmp(word,(char*)"outputMacro:") || !stricmp(word,(char*)"patternMacro:") || !stricmp(word,(char*)"dualMacro:") || !stricmp(word,(char*)"table:") || !stricmp(word,(char*)"tableMacro:") || !stricmp(word,(char*)"rename:") || !stricmp(word,(char*)"describe:") || !stricmp(word,(char*)"bot:") || !stricmp(word, (char*)"language:") || !stricmp(word,(char*)"topic:") || (*word == ':' && IsLowerCase( word[1]) && IsLowerCase(word[2]) && !IsEmojiShortname(word)) ); // :xxx is a debug command } static char* FlushToTopLevel(FILE* in,unsigned int depth,char* data) { globalDepth = depth; if (data) *data = 0; // remove data char word[MAX_WORD_SIZE]; int oldindex = jumpIndex; jumpIndex = -1; // prevent ReadNextSystemToken from possibly crashing. if (newScriptBuffer) *newScriptBuffer = 0; ReadNextSystemToken(NULL,NULL,word,false); // clear out anything ahead char* ptr = readBuffer + strlen(readBuffer) - 1; while (ALWAYS) { char* quote = NULL; while ((quote = strchr(ptr,'"'))) ptr = quote + 1; // flush quoted things ptr = ReadNextSystemToken(in,ptr,word,false); if (!*word) break; MakeLowerCopy(lowercaseForm,word); if (TopLevelUnit(lowercaseForm) || TopLevelRule(lowercaseForm)) { ptr -= strlen(word); // safe break; } } jumpIndex = oldindex; return ptr; } static bool IsSet(char* word) { if (*word != '~') return false; if (!word[1]) return true; // single ~ is a set, representing current topic WORDP D = FindWord(word,0,LOWERCASE_LOOKUP); return (D ) ? true : false; } static bool IsTopic(char* word) { if (!word[1]) return true; WORDP D = FindWord(word,0,LOWERCASE_LOOKUP); return (D && D->internalBits & TOPIC) ? true : false; } static void DoubleCheckSetOrTopic() { char file[200]; *scopeBotName = 0; // no longer compiling a specific bot sprintf(file,"%s/missingsets.txt", topicfolder); FILE* in = FopenReadWritten(file); if (!in) return; *currentFilename = 0; // dont tell the name of the file while (ReadALine(readBuffer,in) >= 0) { char* ptr = readBuffer; char word[MAX_WORD_SIZE]; char line[MAX_WORD_SIZE]; char column[MAX_WORD_SIZE]; ptr = ReadCompiledWord(readBuffer,word); ptr = ReadCompiledWord(ptr, line); currentFileLine = atoi(line); ptr = ReadCompiledWord(ptr, column); currentLineColumn = atoi(column); ReadCompiledWord(ptr, currentFilename); char* data = strchr(word, '$'); if (data) *data = 0; if (!IsSet(word) && !IsTopic(word) && !IsDigit(word[1])) // NEVER defined WARNSCRIPT((char*)"Undefined set or topic %s\r\n",word) else if (data && data[1])// check if defined for appropriate bot { if (data) *data = '$'; if (!data || !data[1]) continue; // no bot specific given WORDP D = FindWord(word); if (!D || !(D->internalBits & BEEN_HERE)) { if (data) *data = 0; WARNSCRIPT("Undefined set or topic %s for bot %s\r\n", word, data+1) } } } FClose(in); remove(file); } static void CheckSetOrTopic(char* name) // we want to prove all references to set get defined { if (livecall) return; if (*name == '~' && !name[1]) return; // simple ~ reference char word[MAX_WORD_SIZE]; MakeLowerCopy(word,name); char* label = strchr(word,'.'); // set reference might be ~set or ~set.label if (label) *label = 0; if ((IsTopic(word) || IsSet(word))) return; // we know this already char combo[MAX_WORD_SIZE]; strcpy(combo, word); strcat(combo, "$"); char bot[MAX_WORD_SIZE]; if (*scopeBotName == ' ') strcpy(bot, scopeBotName + 1); else strcpy(bot, scopeBotName); size_t len = strlen(bot); if (bot[len - 1] == ' ') bot[len - 1] = 0; strcat(combo, bot); if (FindWord(combo)) return; // we know this already WORDP D = StoreWord(combo); char file[200]; sprintf(file,"%s/missingsets.txt", topicfolder); FILE* out = FopenUTF8WriteAppend(file); fprintf(out,(char*)"%s %u %u %s\r\n",combo,currentFileLine,currentLineColumn, currentFilename); fclose(out); // dont use FClose } static char* AddVerify(char* kind, char* sample) { char* comment = strstr(sample,(char*)"# "); // locate any comment on the line and kill it if (comment) *comment = 0; sprintf(verifyLines[verifyIndex++],(char*)"%s %s",kind,SkipWhitespace(sample)); return 0; // kill rest of line } static void WriteVerify(char* label) { if (!verifyIndex) return; char name[100]; sprintf(name, (char*)"VERIFY/%s-b%c.txt", currentTopicName + 1, (buildID == BUILD0) ? '0' : '1'); FILE* valid = FopenUTF8WriteAppend(name); static bool init = true; if (!valid && init) { MakeDirectory((char*)"VERIFY"); init = false; valid = FopenUTF8WriteAppend(name); if (!valid) printf("Unable to create VERIFY directory.\r\n"); } if (valid) { char* space = ""; if (REJOINDERID(currentRuleID)) space = " "; for (unsigned int i = 0; i < verifyIndex; ++i) { if (*label) fprintf(valid, (char*)"%s%s.%u.%u=%s %s\r\n", space, currentTopicName, TOPLEVELID(currentRuleID), REJOINDERID(currentRuleID), label, verifyLines[i]); else fprintf(valid, (char*)"%s%s.%u.%u %s\r\n", space, currentTopicName, TOPLEVELID(currentRuleID), REJOINDERID(currentRuleID), verifyLines[i]); } fclose(valid); // dont use FClose } verifyIndex = 0; } #ifdef INFORMATION We mark words that are not normally in the dictionary as pattern words if they show up in patterns. For example, the names of synset heads are not words, but we use them in patterns. They will be marked during the scan phase of matching ONLY if some pattern "might want them". I.e., they have a pattern-word mark on them. same is true for multiword phrases we scan. Having marked words also prevents us from spell-correcting something we were expecting but which is not a legal word. #endif static void DownHierarchy(MEANING T, FILE* out, int depth) { if ( !T) return; WORDP D = Meaning2Word(T); if (D->internalBits & VAR_CHANGED) return; if (*D->word == '~') fprintf(out,(char*)"%s depth=%d\r\n",D->word,depth); else fprintf(out,(char*)" %s\r\n",D->word); D->internalBits |= VAR_CHANGED; unsigned int size = GetMeaningCount(D); if (!size) size = 1; if (*D->word == '~') // show set members { FACT* F = GetObjectNondeadHead(D); while (F) { if (F->verb == Mmember) DownHierarchy(F->subject,out,depth+1); F = GetObjectNondeadNext(F); } fprintf(out,(char*)". depth=%d\r\n",depth); } } static void WriteKey(char* word) { if (!compiling || spellCheck != NOTE_KEYWORDS || *word == '_' || *word == '\'' || *word == USERVAR_PREFIX || *word == SYSVAR_PREFIX || *word == '@') return; if (!stricmp(current_language, "Japanese") || !stricmp(current_language, "chinese")) return; // no spellcheck happens // cheap test when not using Language param yet unsigned char japanletter[8]; int kind; if (csapicall == COMPILE_PATTERN && IsJapanese(word, (unsigned char*)&japanletter, kind)) return; StoreWord(word); if (livecall) { return; } char file[SMALL_WORD_SIZE]; sprintf(file,(char*)"%s/keys.txt", tmpfolder); FILE* out = FopenUTF8WriteAppend(file); if (out) { DownHierarchy(MakeMeaning(StoreWord(word)),out,0); fclose(out); // dont use Fclose } } static void WritePatternWord(char* word) { if (IsUniversal(word,0)) return; // not normal words if (!stricmp(current_language, "japanese") || !stricmp(current_language, "chinese")) return; // no spell check unsigned char japanletter[8]; int kind; if (csapicall == COMPILE_PATTERN && IsJapanese(word, (unsigned char*)&japanletter, kind)) return; if (!compiling) return; // do we want to note this word WORDP D = StoreWord(word,AS_IS); // if word is not resident, maybe its properties are transient so we must save pattern marker if (!(D->properties & NORMAL_WORD) && !(D->systemFlags & PATTERN_WORD)) { AddSystemFlag(D, PATTERN_WORD); if (compiling == CONCEPTSTRING_COMPILE) { heapPatternThread = AllocateHeapval(HV1_WORDP,heapPatternThread, (uint64)D); return; } } // case sensitivity? char tmp[MAX_WORD_SIZE]; MakeLowerCopy(tmp,word); WORDP lower = FindWord(word,0,LOWERCASE_LOOKUP); WORDP upper = FindWord(word,0,UPPERCASE_LOOKUP); char utfcharacter[10]; char* x = IsUTF8(word, utfcharacter); if (!strcmp(tmp,word) || utfcharacter[1]) {;} // came in as lower case or as UTF8 character, including ones that don't have an uppercase version? else if (nospellcheck) {;} else if (lower && lower->internalBits & DO_NOISE && !(lower->systemFlags & HAS_SUBSTITUTE)) {} // told not to check else if (upper && (GetMeaningCount(upper) > 0 || upper->properties & NORMAL_WORD )){;} // clearly known as upper case else if ( !nomixedcase && !livecall && !(spellCheck & NO_SPELL) && lower && lower->properties & NORMAL_WORD && !(lower->properties & (DETERMINER|AUX_VERB))) WARNSCRIPT((char*)"Keyword %s should not be uppercase - did prior rule fail to close\r\n",word) else if (!nomixedcase && !livecall && !(spellCheck & NO_SPELL) && spellCheck && lower && lower->properties & VERB && !(lower->properties & NOUN)) WARNSCRIPT((char*)"Uppercase keyword %s is usually a verb. Did prior rule fail to close\r\n",word) char* pos = strchr(word, '\''); if ( pos && pos[1] != 's' && !pos[1] && (pos-word) > 1 && *(pos-2) != '_') // not possessive '1 or separated french or other l' { WARNSCRIPT((char*)"Contractions are always expanded - %s won't be recognized\r\n",word) } if (D->properties & NORMAL_WORD) return; // already a known word if (D->internalBits & BEEN_HERE) return; // already written to pattern file or doublecheck topic ref file if (compiling != CONCEPTSTRING_COMPILE) AddBeenHere(D); if (livecall ) { patternwordthread = AllocateHeapval(HV1_WORDP,patternwordthread, (uint64)D);// save name } } static void NoteUse(char* label,char* topicName) { char xlabel[MAX_WORD_SIZE]; char* dot = strchr(label, '.'); char* tilde = strchr(label + 1, '~'); if (tilde) { *tilde = 0; sprintf(xlabel, "%s%s", label, dot); *tilde = '~'; } else strcpy(xlabel, label); char labelx[MAX_WORD_SIZE]; char bots[MAX_WORD_SIZE]; strcpy(bots, scopeBotName); if (*bots == ' ') strcpy(bots, scopeBotName + 1); if (!*bots) strcpy(bots, "*"); int len = strlen(bots); if (bots[len - 1] == ' ') bots[len - 1] = 0; MakeUpperCase(bots); sprintf(labelx, "%s-%s", xlabel,bots); MakeUpperCase(labelx); WORDP D = FindWord(labelx); if (!D || !(D->internalBits & LABEL)) // bug doesnt look for generic if bots exists { char file[200]; sprintf(file,"%s/missingLabel.txt", topicfolder); FILE* out = FopenUTF8WriteAppend(file); if (out) { if (*scopeBotName) fprintf(out, (char*)"%s %s %s %u\r\n", xlabel, scopeBotName, currentFilename, currentFileLine); // generic else fprintf(out,(char*)"%s * %s 0 %u\r\n",xlabel,currentFilename,currentFileLine); // specific bot fclose(out); // dont use FClose } } } static void ValidateCallArgs(WORDP D,char* arg1, char* arg2,char* argset[ARGSETLIMIT+1], bool needToField) { if (needToField) // assigning query to var, must give TO field value { if (!*argset[1] || !*argset[2] || !*argset[3] || !*argset[4] || !*argset[5] || !*argset[6]) BADSCRIPT((char*)"CALL- 62 query assignment to variable requires TO field\r\n") char* p = argset[7]; while (IsDigit(*++p)){} // skip if (!*p) WARNSCRIPT((char*)"Query assignment requires field name in %s, I don't see one.\r\n",argset[7]) } if (!stricmp(D->word,(char*)"^next")) { if (stricmp(arg1,(char*)"RESPONDER") && stricmp(arg1,(char*)"LOOP") && stricmp(arg1, (char*)"JSONLOOP") && stricmp(arg1,(char*)"REJOINDER") && stricmp(arg1,(char*)"RULE") && stricmp(arg1,(char*)"GAMBIT") && stricmp(arg1,(char*)"INPUT") && stricmp(arg1,(char*)"FACT")) BADSCRIPT((char*)"CALL- 62 1st argument to ^next must be FACT or LOOP or JSONLOOP or INPUT or RULE or GAMBIT or RESPONDER or REJOINDER - %s\r\n",arg1) } else if(!stricmp(D->word,(char*)"^jsonarraydelete")) { MakeLowerCase(arg1); if (!strstr(arg1,(char*)"index") && !strstr(arg1,(char*)"value") ) BADSCRIPT((char*)"CALL- ? 1st argument to ^jsonarraydelete must be INDEX or VALUE - %s\r\n",arg1) } else if(!stricmp(D->word,(char*)"^keephistory")) { if (stricmp(arg1,(char*)"USER") && stricmp(arg1,(char*)"BOT") ) BADSCRIPT((char*)"CALL- ? 1st argument to ^keephistory must be BOT OR USER - %s\r\n",arg1) } else if (!stricmp(D->word,(char*)"^conceptlist")) { if (stricmp(arg1,(char*)"TOPIC") && stricmp(arg1,(char*)"CONCEPT") && stricmp(arg1,(char*)"BOTH")) BADSCRIPT((char*)"CALL- ? 1st argument to ^conceptlist must be CONCEPT or TOPIC or BOTH - %s\r\n",arg1) } else if (!stricmp(D->word,(char*)"^field") && IsAlphaUTF8(*arg2)) { if (*arg2 != '$' && *arg2 != '^' && *arg2 != 's' && *arg2 != 'S' && *arg2 != 'v' && *arg2 != 'V' && *arg2 != 'O' && *arg2 != 'o' && *arg2 != 'F' && *arg2 != 'f' && *arg2 != 'A' && *arg2 != 'a' && *arg2 != 'R' && *arg2 != 'r') BADSCRIPT((char*)"CALL- 9 2nd argument to ^field must be SUBJECT or VERB or OBJECT or ALL or RAW or FLAG- %s\r\n",arg1) } else if (!stricmp(D->word,(char*)"^decodepos") ) { if (stricmp(arg1,(char*)"POS") && stricmp(arg1,(char*)"ROLE")) BADSCRIPT((char*)"CALL- ? 1st argument to ^decodepos must be POS or ROLE - %s\r\n",arg1) } else if (!stricmp(D->word,(char*)"^position") ) { if (stricmp(arg1,(char*)"START") && stricmp(arg1,(char*)"END") && stricmp(arg1,(char*)"BOTH")) BADSCRIPT((char*)"CALL- ? 1st argument to ^position must be START, END, BOTH, - %s\r\n",arg1) } else if (!stricmp(D->word,(char*)"^getparse") ) { if (stricmp(arg2,(char*)"PHRASE") && stricmp(arg2,(char*)"VERBAL") && stricmp(arg2,(char*)"CLAUSE")&& stricmp(arg2,(char*)"NOUNPHRASE")) BADSCRIPT((char*)"CALL- ? 2nd argument to ^getparse must be PHRASE, VERBAL, CLAUSE, NOUNPHRASE- %s\r\n",arg2) } else if (!stricmp(D->word, (char*)"^reset")) { if (stricmp(arg1, (char*)"history") && stricmp(arg1, (char*)"facts") && stricmp(arg1, (char*)"variables") && stricmp(arg1, (char*)"user") && stricmp(arg1, (char*)"topic") && stricmp(arg1, (char*)"output") && *arg1 != '@') BADSCRIPT((char*)"CALL- 10 1st argument to ^reset must be USER or TOPIC or OUTPUT or VARIABLES or FACTS or HISTORY or an @set- %s\r\n", arg1) } else if (!stricmp(D->word,(char*)"^substitute")) { if (stricmp(arg1,(char*)"word") && stricmp(arg1,(char*)"character") && stricmp(arg1,(char*)"insensitive") && *arg1 != '"' && *arg1 != '^') BADSCRIPT((char*)"CALL- 11 1st argument to ^substitute must be WORD or CHARACTER or INSENSITIVE- %s\r\n",arg1) } else if (!stricmp(D->word,(char*)"^setrejoinder")) { if (*arg2 && stricmp(arg1,(char*)"input") && stricmp(arg1,(char*)"output") && stricmp(arg1,(char*)"copy") ) BADSCRIPT((char*)"CALL- 63 call to ^setrejoinder requires INPUT or OUTPUT or COPY as the 1st arg.\r\n") if (!*arg2 && (!stricmp(arg1,(char*)"input") || !stricmp(arg1,(char*)"output") || !stricmp(arg1,(char*)"copy")) ) BADSCRIPT((char*)"CALL- 63 call to ^setrejoinder requires 2nd argument naming what rule to use as rejoinder\r\n") } else if (!stricmp(D->word, (char*)"^pos")) { if (stricmp(arg1, (char*)"conjugate") && stricmp(arg1, (char*)"preexists") && stricmp(arg1, (char*)"raw") && stricmp(arg1, (char*)"allupper") && stricmp(arg1, (char*)"syllable") && stricmp(arg1, (char*)"ADJECTIVE") && stricmp(arg1, (char*)"ADVERB") && stricmp(arg1, (char*)"VERB") && stricmp(arg1, (char*)"AUX") && stricmp(arg1, (char*)"PRONOUN") && stricmp(arg1, (char*)"TYPE") && stricmp(arg1, (char*)"HEX32") && stricmp(arg1, (char*)"HEX64") && stricmp(arg1, (char*)"NOUN") && stricmp(arg1, (char*)"DETERMINER") && stricmp(arg1, (char*)"PLACE") && stricmp(arg1, (char*)"common") && stricmp(arg1, (char*)"capitalize") && stricmp(arg1, (char*)"uppercase") && stricmp(arg1, (char*)"lowercase") && stricmp(arg1, (char*)"canonical") && stricmp(arg1, (char*)"grade") && stricmp(arg1, (char*)"mixcase") && stricmp(arg1, (char*)"substitute") && stricmp(arg1, (char*)"isword") && stricmp(arg1, (char*)"integer") && stricmp(arg1, (char*)"IsModelNumber") && stricmp(arg1, (char*)"IsFunction") && stricmp(arg1, (char*)"IsInteger") && stricmp(arg1, (char*)"IsUppercase") && stricmp(arg1, (char*)"IsAllUppercase") && stricmp(arg1, (char*)"IsFloat") && stricmp(arg1, (char*)"IsMixedCase") && stricmp(arg1, (char*)"Xref")) BADSCRIPT((char*)"CALL- 12 1st argument to ^pos must be ISWORD or SYLLABLE or ALLUPPER or VERB or AUX or GRADE or PRONOUN or NOUN or ADJECTIVE or ADVERB or DETERMINER or PLACE or COMMON or CAPITALIZE or UPPERCASE or LOWERCASE or CANONICAL or INTEGER or HEX32 or HEX64 or ISMODELNUMBER or ISFUNCTION or ISINTEGER or ISUPPERCASE or ISALLUPPERCASE or ISFLOAT or ISMIXEDCASE or XREF - %s\r\n", arg1) } else if (!stricmp(D->word,(char*)"^getrule")) { if (stricmp(arg1,(char*)"TOPIC") && stricmp(arg1,(char*)"OUTPUT") && stricmp(arg1,(char*)"PATTERN") && stricmp(arg1,(char*)"LABEL") && stricmp(arg1,(char*)"TYPE") && stricmp(arg1,(char*)"TAG") && stricmp(arg1,(char*)"USABLE")) BADSCRIPT((char*)"CALL- 13 1st argument to ^getrule must be TAG or TYPE or LABEL or PATTERN or OUTPUT or TOPIC or USABLE - %s\r\n",arg1) } else if (!stricmp(D->word,(char*)"^poptopic")) { if (*arg1 && *arg1 != '~' && *arg1 != USERVAR_PREFIX && *arg1 != '_' && *arg1 != SYSVAR_PREFIX && *arg1 != '^') BADSCRIPT((char*)"CALL- 61 1st argument to ^poptopic must be omitted or a topic name or variable which will return a topic name - %s\r\n",arg1) } else if (!stricmp(D->word,(char*)"^nextrule")) { if (stricmp(arg1,(char*)"GAMBIT") && stricmp(arg1,(char*)"RESPONDER") && stricmp(arg1,(char*)"REJOINDER") && stricmp(arg1,(char*)"RULE")) BADSCRIPT((char*)"CALL- 14 1st argument to ^getrule must be TAG or TYPE or LABEL or PATTERN or OUTPUT - %s\r\n",arg1) } else if (!stricmp(D->word,(char*)"^end")) { if (stricmp(arg1,(char*)"RULE") && stricmp(arg1,(char*)"CALL") && stricmp(arg1,(char*)"LOOP") && stricmp(arg1, (char*)"JSONLOOP") && stricmp(arg1,(char*)"TOPIC") && stricmp(arg1,(char*)"SENTENCE") && stricmp(arg1,(char*)"INPUT") && stricmp(arg1,(char*)"PLAN")) BADSCRIPT((char*)"CALL- 15 1st argument to ^end must be RULE or LOOP or JSONLOOP or TOPIC or SENTENCE or INPUT or PLAN- %s\r\n",arg1) } else if (!stricmp(D->word,(char*)"^fail")) { if (stricmp(arg1, (char*)"TOPRULE") && stricmp(arg1, (char*)"CALL") && stricmp(arg1, (char*)"RULE") && stricmp(arg1, (char*)"CALL") && stricmp(arg1, (char*)"LOOP") && stricmp(arg1, (char*)"JSONLOOP") && stricmp(arg1, (char*)"TOPIC") && stricmp(arg1, (char*)"SENTENCE") && stricmp(arg1, (char*)"INPUT")) BADSCRIPT((char*)"CALL- 16 1st argument to ^fail must be CALL or RULE or TOPRULE or LOOP or JSONLOOP or TOPIC or SENTENCE or INPUT - %s\r\n", arg1) } else if (!stricmp(D->word,(char*)"^nofail")) { if (stricmp(arg1,(char*)"RULE") && stricmp(arg1, (char*)"CALL") && stricmp(arg1,(char*)"LOOP") && stricmp(arg1, (char*)"JSONLOOP") && stricmp(arg1,(char*)"TOPIC") && stricmp(arg1,(char*)"SENTENCE") && stricmp(arg1,(char*)"INPUT")) BADSCRIPT((char*)"CALL- 16 1st argument to ^nofail must be CALL or RULE or LOOP or JSONLOOP or TOPIC or SENTENCE or INPUT - %s\r\n",arg1) } else if (!stricmp(D->word,(char*)"^compute")) { char* op = arg2; if (*op != '\'' && stricmp(op, (char*)"+") && stricmp(op, (char*)"plus") && stricmp(op, (char*)"add") && stricmp(op, (char*)"and") && stricmp(op,(char*)"sub") && stricmp(op,(char*)"minus") && stricmp(op,(char*)"subtract") && stricmp(op,(char*)"deduct") && stricmp(op,(char*)"-") && stricmp(op,(char*)"x") && stricmp(op,(char*)"times") && stricmp(op,(char*)"multiply") && stricmp(op,(char*)"*") && stricmp(op,(char*)"divide") && stricmp(op,(char*)"quotient") && stricmp(op,(char*)"/") && stricmp(op,(char*)"remainder") && stricmp(op,(char*)"modulo") && stricmp(op,(char*)"mod") && stricmp(op,(char*)"%") && stricmp(op,(char*)"random") && stricmp(op,(char*)"root") && stricmp(op,(char*)"square_root") && stricmp(op,(char*)"power") && stricmp(op,(char*)"exponent") && *op != '^' && *op != '_' && *op != '$') // last covers macro args and exponents BADSCRIPT((char*)"CALL- 17 2nd argument to ^compute must be numeric operation - %s\r\n",op) } else if (!stricmp(D->word,(char*)"^counttopic") && IsAlphaUTF8(*arg1)) { if (strnicmp(arg1,(char*)"gambit",6) && stricmp(arg1,(char*)"used") && strnicmp(arg1,(char*)"rule",4) && stricmp(arg1,(char*)"available")) BADSCRIPT((char*)"CALL-20 CountTopic 1st arg must be GAMBIT or RULE or AVAILABLE or USED - %s\r\n",arg1) } else if (!stricmp(D->word,(char*)"^phrase")) { if (stricmp(arg1,(char*)"adjective") && stricmp(arg1,(char*)"verbal")&& stricmp(arg1,(char*)"noun") && stricmp(arg1,(char*)"preposition")) BADSCRIPT((char*)"CALL-21 ^Phrase 1st arg must be adjective or verbal or noun or preposition - %s\r\n",arg1) } else if (!stricmp(D->word,(char*)"^hasgambit") && IsAlphaUTF8(*arg2)) { if (stricmp(arg2,(char*)"last") && stricmp(arg2,(char*)"any") ) BADSCRIPT((char*)"CALL-21 HasGambit 2nd arg must be omitted or be LAST or ANY - %s\r\n",arg1) } else if (!stricmp(D->word,(char*)"^lastused" )) { if (strnicmp(arg2,(char*)"gambit",6) && strnicmp(arg2,(char*)"rejoinder",9) && strnicmp(arg2,(char*)"responder",9) && stricmp(arg2,(char*)"any")) BADSCRIPT((char*)"CALL-22 LastUsed 2nd arg must be GAMBIT or REJOINDER or RESPONDER or ANY - %s\r\n",arg2) } else if ((!stricmp(nextToken,(char*)"^first") || !stricmp(nextToken,(char*)"^last") || !stricmp(nextToken,(char*)"^random")) && *arg2) BADSCRIPT((char*)"CALL-23 Too many arguments to first/last/random - %s\r\n",arg2) else if (!stricmp(D->word,(char*)"^respond") && atoi(arg1)) BADSCRIPT((char*)"CALL-? argument to ^respond should be a topic, not a number. Did you intend ^response? - %s\r\n",arg1) else if (!stricmp(D->word,(char*)"^respond") && !stricmp(arg1,currentTopicName)) WARNSCRIPT((char*)"Recursive call to topic - possible infinite recursion danger %s\r\n",arg1) else if (!stricmp(D->word, (char*)"^jsonmerge") && !stricmp(arg1, "key") && !stricmp(arg1, "key-value")) WARNSCRIPT((char*)"^jsonmerge requires key or key-value %s\r\n", arg1) else if (!stricmp(D->word,(char*)"^gambit") && !stricmp(arg1,currentTopicName)) WARNSCRIPT((char*)"Recursive call to topic - possible infinite recursion danger %s\r\n",arg1) else if (!stricmp(D->word,(char*)"^response") && *arg1 == '~') BADSCRIPT((char*)"CALL-? argument to ^response should be a number, not a topic. Did you intend ^respond? - %s\r\n",arg1) else if (!stricmp(D->word,(char*)"^burst") && !stricmp(arg1,(char*)"wordcount")) BADSCRIPT((char*)"CALL-? argument to ^burst renamed. Use 'count' instead of 'wordcount'\r\n") // validate inference calls if we can else if (!strcmp(D->word,(char*)"^query")) { unsigned int flags = atoi(argset[9]); if (flags & (USER_FLAG1|USER_FLAG2|USER_FLAG3) && !strstr(arg1,(char*)"flag_")) BADSCRIPT((char*)"CALL-24 ^query involving USER_FLAG1 must be named xxxflag_\r\n") if (!stricmp(arg1,(char*)"direct_s") || !stricmp(arg1,(char*)"exact_s")) { if (!*arg2 || *arg2 == '?') BADSCRIPT((char*)"CALL-24 Must name subject argument to query\r\n") if (*argset[3] && *argset[3] != '?') BADSCRIPT((char*)"CALL-25 Cannot name verb argument to query %s - %s\r\n",arg1,argset[3]) if (*argset[4] && *argset[4] != '?') BADSCRIPT((char*)"CALL-26 Cannot name object argument to query %s - %s\r\n",arg1,argset[4]) if (*argset[8] && *argset[8] != '?') BADSCRIPT((char*)"CALL-27 Cannot name propgation argument to query %s - %s\r\n",arg1,argset[8]) if (*argset[9] && *argset[9] != '?') BADSCRIPT((char*)"CALL-28 Cannot name match argument to query %s - %s\r\n",arg1,argset[9]) } flags = atoi(argset[5]); if (flags & (USER_FLAG1|USER_FLAG2|USER_FLAG3) && flags < 0x00ffffff) WARNSCRIPT((char*)"Did you want a xxxflag_ query with USER_FLAG in 9th position for %s\r\n",arg1) if (!stricmp(arg1,(char*)"direct_v") || !stricmp(arg1,(char*)"exact_v")) { if (*arg2 && *arg2 != '?') BADSCRIPT((char*)"CALL-29 Cannot name subject argument to query - %s\r\n",arg2) if (!*argset[3] || *argset[3] == '?') BADSCRIPT((char*)"CALL-30 Must name verb argument to query\r\n") if (*argset[4] && *argset[4] != '?') BADSCRIPT((char*)"CALL-31 Cannot name object argument to query %s - %s\r\n",arg1,argset[4]) if (*argset[8] && *argset[8] != '?') BADSCRIPT((char*)"CALL-32 Cannot name propgation argument to query %s - %s\r\n",arg1,argset[8]) if (*argset[9] && *argset[9] != '?') BADSCRIPT((char*)"CALL-33 Cannot name match argument to query %s - %s\r\n",arg1,argset[9]) } if (!stricmp(arg1,(char*)"direct_o") || !stricmp(arg1,(char*)"exact_o")) { if (*arg2 && *arg2 != '?') BADSCRIPT((char*)"CALL-34 Cannot name subject argument to query -%s\r\n",arg2) if (*argset[3] && *argset[3] != '?') BADSCRIPT((char*)"CALL-35 Cannot name verb argument to query %s - %s\r\n",arg1,argset[3]) if (!*argset[4] || *argset[4] == '?') BADSCRIPT((char*)"CALL-36 Must name object argument to query\r\n") if (*argset[8] && *argset[8] != '?') BADSCRIPT((char*)"CALL-37 Cannot name propgation argument to query %s - %s\r\n",arg1,argset[8]) if (*argset[9] && *argset[9] != '?') BADSCRIPT((char*)"CALL-38 Cannot name match argument to query %s - %s\r\n",arg1,argset[9]) } if (!stricmp(arg1,(char*)"direct_sv") || !stricmp(arg1,(char*)"exact_sv") ) { if (!*arg2 || *arg2 == '?') BADSCRIPT((char*)"CALL-39 Must name subject argument to query\r\n") if (!*argset[3] || *argset[3] == '?') BADSCRIPT((char*)"CALL-40 Must name verb argument to query\r\n") if (*argset[4] && *argset[4] != '?') BADSCRIPT((char*)"CALL-41 Cannot name object argument to query %s - %s\r\n",arg1,argset[4]) if (*argset[8] && *argset[8] != '?') BADSCRIPT((char*)"CALL-42 Cannot name propgation argument to query %s - %s\r\n",arg1,argset[8]) if (*argset[9] && *argset[9] != '?') BADSCRIPT((char*)"CALL-43 Cannot name match argument to query %s - %s\r\n",arg1,argset[9]) } if (!stricmp(arg1,(char*)"direct_sv_member")) { if (!*arg2 || *arg2 == '?') BADSCRIPT((char*)"CALL-44 Must name subject argument to query\r\n") if (!*argset[3] || *argset[3] == '?') BADSCRIPT((char*)"CALL-45 Must name verb argument to query\r\n") if (*argset[4] && *argset[4] != '?') BADSCRIPT((char*)"CALL-46 Cannot name object argument to query %s - %s\r\n",arg1,argset[4]) if (*argset[8] && *argset[8] != '?') BADSCRIPT((char*)"CALL-47 Cannot name propgation argument to query %s - %s\r\n",arg1,argset[8]) if (*argset[9] && *argset[9] == '?') BADSCRIPT((char*)"CALL-48 Must name match argument to query %s - %s\r\n",arg1,argset[9]) } if (!stricmp(arg1,(char*)"direct_vo")|| !stricmp(arg1,(char*)"exact_vo")) { if (*arg2 && *arg2 != '?') BADSCRIPT((char*)"CALL-49 Cannot name subject argument to query -%s\r\n",arg2) if (!*argset[3] || *argset[3] == '?') BADSCRIPT((char*)"CALL-50 Must name verb argument to query\r\n") if (!*argset[4] || *argset[4] == '?') BADSCRIPT((char*)"CALL-51 Must name object argument to query\r\n") if (*argset[8] && *argset[8] != '?') BADSCRIPT((char*)"CALL-52 Cannot name propgation argument to query %s - %s\r\n",arg1,argset[8]) if (*argset[9] && *argset[9] != '?') BADSCRIPT((char*)"CALL-53 Cannot name match argument to query %s - %s\r\n",arg1,argset[9]) } if (!stricmp(arg1,(char*)"direct_svo") || !stricmp(arg1,(char*)"exact_svo") ) { if (!*arg2 || *arg2 == '?') BADSCRIPT((char*)"CALL-54 Must name subject argument to query\r\n") if (!*argset[3] || *argset[3] == '?') BADSCRIPT((char*)"CALL-55 Must name verb argument to query\r\n") if (!*argset[4] || *argset[4] == '?') BADSCRIPT((char*)"CALL-56 Must name object argument to query\r\n") if (*argset[8] && *argset[8] != '?') BADSCRIPT((char*)"CALL-57 Cannot name propgation argument to query %s - %s\r\n",arg1,argset[8]) if (*argset[9] && *argset[9] != '?') BADSCRIPT((char*)"CALL-58 Cannot name match argument to query %s - %s\r\n",arg1,argset[9]) } } } static void CheckTopicExists(char* name) { if (*name != '~' || !name[1]) return; // not a direct topic name WORDP D = FindWord(name); if (D && D->internalBits & TOPIC) return; // generate crosscheck data int lsize = (strlen(name) + 2 + 8 + strlen(scopeBotName) + 1 + 7) / 8; char* nameData = AllocateHeap(NULL, lsize, 8); *(uint64*)nameData = myBot; nameData[8] = 0; strcpy(nameData + 9, name); char* more = nameData + 10 + strlen(nameData + 9); strcpy(more, scopeBotName); char* filename = AllocateHeap(currentFilename, 0, 1); undefinedConceptThreadList = AllocateHeapval(HV1_STRING | HV2_STRING | HV3_INT, undefinedConceptThreadList, (uint64)nameData, (uint64)filename, (uint64)currentFileLine); } static char* ReadCall(char* name, char* ptr, FILE* in, char* &data,bool call, bool needTofield) { // returns with no space after it // ptr is just after the ^name -- user can make a call w/o ^ in name but its been patched. Or this is function argument char reuseTarget1[SMALL_WORD_SIZE]; char reuseTarget2[SMALL_WORD_SIZE]; char* xxstartit = data; int oldcallingsystem = callingSystem; *reuseTarget2 = *reuseTarget1 = 0; // in case this turns out to be a ^reuse call, we want to test for its target char* argset[ARGSETLIMIT+1]; char word[MAX_WORD_SIZE]; char* arguments = ptr; int argumentcolumn = currentLineColumn; int argumentline = currentFileLine; int callline = currentFileLine; int callcolumn = currentLineColumn - strlen(name) ; // locate the function WORDP D = FindWord(name,0,LOWERCASE_LOOKUP); if (!call || !D || !(D->internalBits & FUNCTION_NAME)) // not a function, is it a function variable? { if (IsDigit(name[1])) // function variable { *data++ = *name++; *data++ = *name++; if (IsDigit(*name)) *data++ = *name++; *data = 0; return ptr; } if (csapicall != NO_API_CALL && call && strnicmp(name,"^dp_",4)) // compile or execute are both bad { BADSCRIPT("Undefined function: %s", name) } } SystemFunctionInfo* info = NULL; bool isStream = false; // dont check contents of stream, just format it if (D && !(D->internalBits & FUNCTION_BITS)) // system function (not pattern macro, outputmacro, dual macro, tablemacro, or plan macro) { ++callingSystem; info = &systemFunctionSet[D->x.codeIndex]; if (info->argumentCount == STREAM_ARG) isStream = true; if (!stricmp(name,"^jsonarraysize")) WARNSCRIPT((char*)"^jsonarraysize deprecated in favor of ^length\r\n") if (!stricmp(name,"^jsondelete")) WARNSCRIPT((char*)"^jsondelete deprecated in favor of ^delete\r\n") } else if (patternContext && D && (D->internalBits & IS_PLAN_MACRO) == IS_PLAN_MACRO) BADSCRIPT((char*)"CALL-2 cannot invoke plan from pattern area - %s\r\n", name) else if (patternContext && D && !(D->internalBits & (IS_PATTERN_MACRO | IS_OUTPUT_MACRO))) BADSCRIPT((char*)"CALL-2 Can only call patternmacro or dual macro from pattern area - %s\r\n",name) else if (!patternContext && D && !(D->internalBits & (IS_OUTPUT_MACRO | IS_TABLE_MACRO))) BADSCRIPT((char*)"CALL-3 Cannot call pattern or table macro from output area - %s\r\n",name) memset(argset,0,sizeof(argset)); // default EVERYTHING before we test it later if (D && !stricmp(D->word,(char*)"^debug")) DebugCode(NULL); // a place for a script compile breakpoint // write call header strcpy(data,name); data += strlen(name); *data++ = ' '; *data++ = '('; strcpy(data++, " "); char* strbase = AllocateStack(NULL,1); bool oldContext = patternContext; patternContext = false; priorLine = currentFileLine; // validate argument counts and stuff locally, then swallow data offically as output data int parenLevel = 1; int argumentCount = 0; ptr = ReadNextSystemToken(in,ptr,word,false); // skip ( if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } while (ALWAYS) // read as many tokens as needed to complete the call, storing them locally { ptr = ReadNextSystemToken(in,ptr,word,false); if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } if (!*word) break; if (*word == '#' && word[1] == '!') BADSCRIPT((char*)"#! sample input seen during a call to %s. Probably missing a closing )\r\n",name); // closing paren stuck onto token like _) - break it off size_t len = strlen(word); if (word[len-1] == ')' && len > 1 && (*word != '\\' || len > 2) ) { --ptr; if (*ptr != ')') ptr -= 1; word[len-1] = 0; } MakeLowerCopy(lowercaseForm,word); // note that in making calls, [] () and {} count as a single argument with whatver they have inside switch(*word) { case '(': case '[': case '{': ++parenLevel; break; case ')': case ']': case '}': --parenLevel; if (parenLevel == 1) ++argumentCount; // completed a () argument break; case '"': if (word[1] != FUNCTIONSTRING && oldContext) // simple string is in pattern context, flip to underscore form { // convert string into its pattern form. unsigned int n = BurstWord(word,0); if (n > 1) strcpy(word,JoinWords(n)); } // DROPPING THRU case '\'': // DROPPING THRU default: if (*word == '~') CheckSetOrTopic(word); // set or topic if (!stricmp(word,(char*)"PLAN") && !stricmp(name,(char*)"^end")) endtopicSeen = true; if (parenLevel == 1) { if (*word == FUNCTIONSTRING && word[1] == '"') { word[0] = '"'; word[1] = FUNCTIONSTRING; // show we know it if (word[2] == ':') strcpy(word+3,CompileString(word+1)+2); } ReadNextSystemToken(in,ptr,nextToken,false,true); if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } // argument is a function without its ^ ? // but be wary of doing this in createfact, which can have nested facts if (*word != '^' && *nextToken == '(' && stricmp(name,(char*)"^createfact")) // looks like a call, reformat it if it is { char fnname[SMALL_WORD_SIZE]; *fnname = '^'; MakeLowerCopy(fnname+1,word); WORDP DX = FindWord(fnname,0,PRIMARY_CASE_ALLOWED); if (DX && DX->internalBits & FUNCTION_NAME) strcpy(word,fnname); } if (*word == '^' && (*nextToken == '(' || IsDigit(word[1]))) // function call or function var ref { WORDP D = FindWord(word,0,LOWERCASE_LOOKUP); if (!IsDigit(word[1]) && word[1] != USERVAR_PREFIX && *nextToken == '(' && (!D || !(D->internalBits & FUNCTION_NAME))) BADSCRIPT((char*)"CALL-1 Default call to function not yet defined %s\r\n",word) if (*nextToken != '(' && !IsDigit(word[1])) BADSCRIPT((char*)"CALL-? Unknown function variable %s\r\n",word) char* arg = data; if (!stricmp(word,"^if")) { ptr = ReadIf(word,ptr,in,data,NULL); strcpy(data++, " "); } else if (!stricmp(word,"^loop")) { ptr = ReadLoop(word,ptr,in,data,NULL,false); } else if (!stricmp(word, "^jsonloop")) { ptr = ReadLoop(word, ptr, in, data, NULL,true); } else { ptr = ReadCall(word,ptr,in,data,*nextToken == '(',false); strcpy(data++, " "); } if (argumentCount < ARGSETLIMIT) { argset[++argumentCount] = AllocateStack(arg); } continue; } if (*word == '^' && word[1] == '\''){;} // join can use function names without being a call else if (*word == '^' && stricmp(name,"^join") && * nextToken && *nextToken != '(' && word[1] != '^' && word[1] != USERVAR_PREFIX && word[1] != '_' && word[1] != '"' && !IsDigit(word[1])) // ^^ indicated a deref of something BADSCRIPT((char*)"%s is either a function missing arguments or an undefined function variable.\r\n",word) // not function call or function var ref // track only initial arguments for verify. can have any number when its a stream if (argumentCount < ARGSETLIMIT) { argset[++argumentCount] = AllocateStack(word); } } else { ReadNextSystemToken(in,ptr,nextToken,false,true); if (*word == '^' && *nextToken != '(' && *nextToken != ')' && word[1] != '"' && !IsDigit(word[1])) WARNSCRIPT((char*)"Is %s intended as a function call?\r\n",word) // not function call or function var ref } } if (oldContext && IsAlphaUTF8(*word) && stricmp(name,(char*)"^incontext") && stricmp(name,(char*)"^reuse") ) { //WritePatternWord(word); // we dont consider function datat to be keywords // WriteKey(word); } if (D && !stricmp(D->word, (char*)"^reuse") ) { MakeUpperCase(word); // topic names are lower case & labels must be upper case char* dot = strchr(word, '.'); if (dot) { *dot = 0; MakeLowerCase(word); *dot = '.'; } argset[1] = AllocateStack(word); } // add simple item into data if (!stricmp(word, "call") && (!stricmp(D->word, (char*)"^fail") || !stricmp(D->word, (char*)"^nofail"))) strcpy(word, "RULE"); strcpy(data,word); data += strlen(data); if (parenLevel == 0) break; // we finished the call (no trailing space) strcpy(data++, " "); } *--data = 0; // remove closing paren int cntr = argumentCount; while (cntr < 15) argset[++cntr] = AllocateStack(""); char* arg1 = argset[1]; char* arg2 = argset[2]; // validate assignment calls if we can - this will be a first,last,random call if (*assignKind && (!stricmp(name,(char*)"^first") || !stricmp(name,(char*)"^last") || !stricmp(name,(char*)"^random") || !stricmp(name,(char*)"^nth") ) ) { char kind = arg1[2]; if (*arg1 == '~') {;} // get nth of a concept else if (!kind) BADSCRIPT((char*)"CALL-5 Assignment must designate how to use factset (s v or o)- %s in %s %s\r\n",assignKind,name,arguments) else if ((kind == 'a' || kind == '+' || kind == '-') && *assignKind != '_') BADSCRIPT((char*)"CALL-6 Can only spread a fact onto a match var - %s\r\n",assignKind) else if (*assignKind == SYSVAR_PREFIX && (kind == 'f' || !kind)) BADSCRIPT((char*)"CALL-7 cannot assign fact into system variable\r\n") // into system variable else if (*assignKind == '@' && kind != 'f') BADSCRIPT((char*)"CALL-8 Cannot assign fact field into fact set\r\n") // into set, but not a fact } // functions taking an explicit topic name in argument need to confirm it exists if (D && (!stricmp(D->word, (char*)"^gambit") || !stricmp(D->word, (char*)"^respond") ) && *arg1 == '~') { MakeLowerCase(arg1); // topic names are lower case CheckTopicExists(arg1); } else if (D && !stricmp(D->word, (char*)"^setrejoinder") ) { if (!stricmp(arg1, "input") || !stricmp(arg1, "output") || !stricmp(arg1, "copy")) { MakeUpperCase(arg2); // rulename are uppercase and topic names are lower case char* dot = strchr(arg2, '.'); if (dot) { *dot = 0; MakeLowerCase(arg2); CheckTopicExists(arg2); *dot = '.'; } } else { MakeUpperCase(arg1); // rulename are uppercase and topic names are lower case char* dot = strchr(arg1, '.'); if (dot) { *dot = 0; MakeLowerCase(arg1); CheckTopicExists(arg1); *dot = '.'; } } } else if (D && (!stricmp(D->word, (char*)"^refine") || !stricmp(D->word, (char*)"^reuse"))&& (IsAlphaUTF8(*arg1) || *arg1 == '~')) { // arg1 may be rulename or topic.rulename MakeUpperCopy(reuseTarget1, arg1); // topic names are lower case & labels must be upper case char* dot = strchr(reuseTarget1, '.'); if (dot) { *dot = 0; MakeLowerCase(reuseTarget1); CheckTopicExists(reuseTarget1); *dot = '.'; } } else if (D && !stricmp(D->word,(char*)"^enable") && IsAlphaUTF8(*arg1)) { if (stricmp(arg1,(char*)"topic") && stricmp(arg1,(char*)"write") && stricmp(arg1,(char*)"rule") && stricmp(arg1,(char*)"usedrules") ) BADSCRIPT((char*)"CALL-18 Enable 1st arg must be TOPIC or RULE or USEDRULES - %s\r\n",arg1) if (*arg2 == '@'){;} else if (*arg2 != '~' || strchr(arg2,'.')) // not a topic or uses ~topic.rulename notation { MakeUpperCopy(reuseTarget1,arg2); // topic names & labels must be upper case char* dot = strchr(reuseTarget1, '.'); if (dot) { *dot = 0; MakeLowerCase(reuseTarget1); CheckTopicExists(reuseTarget1); *dot = '.'; } } else CheckTopicExists(arg2); } else if (D && !stricmp(D->word,(char*)"^disable") && IsAlphaUTF8(*arg1)) { if (stricmp(arg1,(char*)"topic") && stricmp(arg1,(char*)"rule") && stricmp(arg1,(char*)"write")&& stricmp(arg1,(char*)"rejoinder") && stricmp(arg1,(char*)"inputrejoinder") && stricmp(arg1,(char*)"outputrejoinder") && stricmp(arg1,(char*)"save") ) BADSCRIPT((char*)"CALL-19 Disable 1st arg must be TOPIC or RULE or INPUTREJOINDER or OUTPUTREJOINDER or SAVE - %s\r\n",arg1) if (*arg2 == '@'){;} else if (!stricmp(arg1,(char*)"rejoinder")){;} else if (*arg2 != '~' || strchr(arg2, '.')) { MakeUpperCopy(reuseTarget1, arg2); // topic names & labels must be upper case char* dot = strchr(reuseTarget1, '.'); if (dot) { *dot = 0; MakeLowerCase(reuseTarget1); CheckTopicExists(reuseTarget1); *dot = '.'; } } else CheckTopicExists(arg2); } if (D) ValidateCallArgs(D,arg1,arg2,argset,needTofield); if (parenLevel != 0) { char* value = (D) ? D->word : (char*)"unknown"; BADSCRIPT((char*)"CALL-59 Failed to properly close (or [ in call to %s started at line %d col %d\r\n", value, callline, callcolumn + supplementalColumn) } FunctionResult result; if (isStream){;} // no cares else if (info) // system function { if (argumentCount != (info->argumentCount & 255) && info->argumentCount != VARIABLE_ARG_COUNT && info->argumentCount != UNEVALED && info->argumentCount != STREAM_ARG) BADSCRIPT((char*)"CALL-60 Incorrect argument count to system function %s- given %d instead of required %d starting at line %d col %d \r\n",name,argumentCount,info->argumentCount & 255, argumentline, argumentcolumn) } else if (D && (D->internalBits & FUNCTION_BITS) == IS_PLAN_MACRO) { if (argumentCount != (int)D->w.planArgCount) BADSCRIPT((char*)"CALL-60 Incorrect argument count to plan %s- given %d instead of required %d\r\n",name,argumentCount,D->w.planArgCount) } else if ((!D || !FindAppropriateDefinition(D, result, true)) && strnicmp(name,"^dp_",4)) { // generate crosscheck data int lsize = (strlen(name) + 2 + 8 + strlen(scopeBotName) + 1 + 7) / 8; char* nameData = AllocateHeap(NULL, lsize, 8); *(uint64*)nameData = myBot; nameData[8] = (char)argumentCount; strcpy(nameData + 9, name); char* more = nameData + 10 + strlen(nameData + 9); strcpy(more, scopeBotName); char* filename = AllocateHeap(currentFilename, 0, 1); undefinedCallThreadList = AllocateHeapval(HV1_STRING|HV2_STRING|HV3_INT,undefinedCallThreadList, (uint64)nameData,(uint64) filename, (uint64)currentFileLine); } else if (strnicmp(name, "^dp_", 4))// std macro (input, output table) { unsigned char* defn = GetDefinition(D); if (defn && argumentCount != (int)MACRO_ARGUMENT_COUNT(defn) && !(D->internalBits & VARIABLE_ARGS_TABLE)) BADSCRIPT((char*)"CALL-60 Incorrect argument count to macro %s- given %d instead of required %d starting line %d col %d \r\n",name,argumentCount,MACRO_ARGUMENT_COUNT(GetDefinition(D)), argumentline, argumentcolumn) } // handle crosscheck of labels char* dot = strchr(reuseTarget1,'.'); if (!*reuseTarget1); else if (dot) // used dotted notation, split them up { strcpy(reuseTarget2,dot+1); *dot = 0; } else if (*reuseTarget1 != '~') // only had name, not topic.name, fill in { strcpy(reuseTarget2,reuseTarget1); if (currentFunctionDefinition) strcpy(reuseTarget1,currentFunctionDefinition->word); else strcpy(reuseTarget1,currentTopicName); } if (*reuseTarget1 && (*reuseTarget1 != '$' && *reuseTarget1 != '^' && *reuseTarget1 != '_' && *reuseTarget2 != USERVAR_PREFIX && *reuseTarget2 != '_')) // we cant crosscheck variable choices { if (*reuseTarget1 != '~') { memmove(reuseTarget1+1,reuseTarget1,strlen(reuseTarget1)+1); *reuseTarget1 = '~'; } strcat(reuseTarget1,(char*)"."); strcat(reuseTarget1,reuseTarget2); // compose the name NoteUse(reuseTarget1,currentFunctionDefinition ? currentFunctionDefinition->word : currentTopicName); } // now generate stuff as an output stream with its validation patternContext = oldContext; if (D && !(D->internalBits & FUNCTION_BITS)) --callingSystem; callingSystem = oldcallingsystem; strcpy(data++, ")"); // outer layer generates trailing space *data = 0; ReleaseStack(strbase); // short term return ptr; } static void TestSubstitute(char* word,char* message) { WORDP D = FindWord(word); if (!D) return; WORDP E = GetSubstitute(D); if (E) { if (E->word[0] == '!') return; // ignore conditional char* which = "Something"; if (D->internalBits & DO_SUBSTITUTES) which = "Substitutes.txt"; if (D->internalBits & DO_CONTRACTIONS) which = "Contractions.txt"; if (D->internalBits & DO_ESSENTIALS) which = "Essentials.txt"; if (D->internalBits & DO_INTERJECTIONS) which = "Interjections.txt"; if (D->internalBits & DO_BRITISH) which = "British.txt"; if (D->internalBits & DO_SPELLING) which = "Spelling.txt"; if (D->internalBits & DO_TEXTING) which = "Texting.txt"; if (D->internalBits & DO_PRIVATE) which = "user private substitution"; size_t len = strlen(D->word); currentLineColumn -= len; if (E->word[1] && E->word[0] != '~' && E->word[0] != '(') // concept changes of words will be considered interjections and pattern match form is accepted { WARNSCRIPT((char*)"%s changes %s to %s %s\r\n", which, word, E->word, message) } else if (!E->word[1]) WARNSCRIPT((char*)"%s erases %s %s\r\n", which, word, message) currentLineColumn += len; } } static void SpellCheckScriptWord(char* input,int startSeen,bool checkGrade) { if (!stricmp(input,(char*)"null")) return; // assignment clears if (nospellcheck) return; if (!stricmp(current_language, "japanese") || !stricmp(current_language, "chinese")) return; int kind = 0; bool japanese = (!stricmp(current_language, "japanese") || !stricmp(current_language, "chinese")); unsigned char japanletter[8]; if (!japanese) japanese = csapicall == COMPILE_PATTERN && IsJapanese(input, (unsigned char*)&japanletter, kind); // remove any trailing punctuation char word[MAX_WORD_SIZE]; // see if supposed to ignore capitalization differences MakeLowerCopy(word, input); WORDP X = FindWord(word, 0, LOWERCASE_LOOKUP); if (X && X->internalBits & DO_NOISE && !(X->systemFlags & HAS_SUBSTITUTE)) return; strcpy(word,input); size_t len = strlen(word); while (len > 1 && !IsAlphaUTF8(word[len-1]) && word[len-1] != '.') word[--len] = 0; WORDP set[GETWORDSLIMIT]; int i = GetWords(word, set, false); // words in any case and with mixed underscore and spaces char text[MAX_WORD_SIZE]; *text = 0; int nn = 0; if (i > 1) // multiple spell? { for (int x = 0; x < i; ++x) { if (GETMULTIWORDHEADER(set[x])) { if (!(set[x]->properties & TAG_TEST)) continue; // dont care } if (set[x]->properties & NOUN_FIRSTNAME) // Will, June, etc continue; strcat(text, set[x]->word); strcat(text, " "); ++nn; } } if (nn > 1 && !nomixedcase) { WARNSCRIPT((char*)"Word \"%s\" known in multiple spellings %s\r\n", word, text) return; } WORDP D = FindWord(word,0,LOWERCASE_LOOKUP); WORDP entry = D; WORDP canonical = D; if (word[1] == 0 || IsUpperCase(*input) || !IsAlphaUTF8(*word) || strchr(word,'\'') || strchr(word,'.') || strchr(word,'_') || strchr(word,'-') || strchr(word,'~')) {;} // ignore proper names, sets, numbers, composite words, wordnet references, etc else if (stricmp(current_language,"english")) {;} // dont complain on foreign else if (!D || (!(D->properties & NORMAL_WORD) && !(D->systemFlags & PATTERN_WORD))) { // we dont know this word in lower case uint64 sysflags = 0; uint64 cansysflags = 0; wordStarts[0] = wordStarts[1] = wordStarts[2] = wordStarts[3] = AllocateHeap((char*)""); wordCount = 1; WORDP revise; uint64 flags = 0; if (D) { ongoingDictChanges = NULL; ongoingUniversalDictChanges = NULL; monitorDictChanges = true; flags = GetPosData((unsigned int)-1, word, revise, entry, canonical, sysflags, cansysflags, false, true, 0); // GetPosData can add properties, revert additions monitorDictChanges = false; ReverseDictionaryChanges(ongoingDictChanges); ReverseUniversalDictionaryChanges(ongoingUniversalDictChanges); ongoingDictChanges = NULL; ongoingUniversalDictChanges = NULL; } if (!flags) // try upper case { WORDP E = FindWord(word,0,SECONDARY_CASE_ALLOWED); // uppercase if (E && E != D && E->word[2] && !nomixedcase) WARNSCRIPT((char*)"Word %s only known in upper case\r\n",word) else if (E && !(E->internalBits & UPPERCASE_HASH) && !D ) WARNSCRIPT((char*)"%s is not a known word. Is it misspelled?\r\n",word) canonical = E; // the base word } } // check vocabularly limits? if (grade && checkGrade && !stricmp(current_language,"English")) { if (canonical && !IsUpperCase(*input) && !(canonical->systemFlags & grade) && !strchr(word,'\'')) // all contractions are legal Log(USERLOG,"Grade Limit: %s\r\n",D->word); } // see if substitition will ruin this word if (!(spellCheck & NO_SUBSTITUTE_WARNING) ) { if (startSeen != -1) TestSubstitute(word,(char*)"anywhere in input"); char test[MAX_WORD_SIZE]; sprintf(test,(char*)"<%s",word); if (startSeen == 0) TestSubstitute(test,(char*)"at input start"); sprintf(test,(char*)"%s>",word); if (startSeen != -1) TestSubstitute(test,(char*)"at input end"); sprintf(test,(char*)"<%s>",word); if (startSeen == 0) TestSubstitute(test,(char*)"as entire input"); } } static char* GetRuleElipsis(char* rule) { static char value[50]; strncpy(value,rule,45); value[45] = 0; return value; } static bool PatternRelationToken(char* ptr) { if (*ptr == '!' && (ptr[1] == '=' || ptr[1] == '?')) return true; if (*ptr == '>' || *ptr == '<' || *ptr == '?' || *ptr == '&') return true; if (*ptr == '=') return true; return false; } static bool RelationToken(char* word) { if (*word == '=') return (word[1] == '=' || !word[1]); return (*word == '<' || *word == '>' || *word == '?' || (*word == '!' && word[1] == '=') || *word == '&'); } static char* ReadDescribe(char* ptr, FILE* in,unsigned int build) { while (ALWAYS) // read as many tokens as needed to complete the definition (must be within same file) { char word[MAX_WORD_SIZE]; char description[MAX_WORD_SIZE]; ptr = ReadNextSystemToken(in,ptr,word,false); if (!stricmp(word,(char*)"describe:")) ptr = ReadNextSystemToken(in,ptr,word,false); // keep going with local loop if (!*word) break; // file ran dry size_t len = strlen(word); if (TopLevelUnit(word)) // definition ends when another major unit starts { ptr -= len; // let someone else see this starter break; } if (*word != USERVAR_PREFIX && *word != '_' && *word != '^' && *word != '~') BADSCRIPT((char*)"Described entity %s is not legal to describe- must be variable or function or concept/topic\r\n", word) isDescribe = true; ptr = ReadNextSystemToken(in,ptr,description,false); isDescribe = false; char file[SMALL_WORD_SIZE]; sprintf(file,(char*)"%s/BUILD%s/describe%s.txt", topicfolder,baseName,baseName); FILE* out = FopenUTF8WriteAppend(file); fprintf(out,(char*)" %s %s\r\n",word,description); fclose(out); // dont use Fclose } return ptr; } static void OverCover(char* laterword, STACKREF keywordList[PATTERNDEPTH], char nestKind[PATTERNDEPTH], int nestIndex) { // [ your "your own"] has your blocking detection of your_own and we want the longer match if (!stricmp(current_language, "japanese") || !stricmp(current_language, "chinese")) return; if (nestKind[nestIndex - 1] != '[' && nestKind[nestIndex - 1] != '{') return; char word1[MAX_WORD_SIZE]; strcpy(word1, laterword); char* underscore = strchr(word1, '_'); if (underscore) // see if masked by earlier word { *underscore = 0; // initial word of phrase STACKREF item = keywordList[nestIndex - 1]; while (item) { uint64 priorword; item = UnpackStackval(item, priorword); if (!strcmp((char*)priorword, word1)) BADSCRIPT((char*)"Keyword phrase %s occluded by %s. Switch their order.", laterword, priorword) } *underscore = '_'; } // add to list char* word = AllocateStack(laterword); keywordList[nestIndex - 1] = AllocateStackval(keywordList[nestIndex - 1], (uint64)word); } // never freed during compile char* ReadPattern(char* ptr, FILE* in, char* &data,bool macro, bool ifstatement) { // called from topic or patternmacro #ifdef INFORMATION // meaning of leading characters < > << >> sentence start & end boundaries, any ! NOT nul end of data from macro definition or argument substitution * *1 *~ *~2 *-1 gap (infinite, exactly 1 word, 0-2 words, 0-2 words, 1 word before) _ _2 memorize next match or refer to 3rd memorized match (0-based) @ factset references @5subject and _1 (positional set) $ user variable ^ ^1 function call or function argument (user) ()[]{} nesting of some kind (sequence AND, OR, OPTIONAL) dquote string token ? a question input ~dat ~ topic/set reference or current topic % system variable =xxx comparison test (= > < ) apostrophe and apostrophe! non-canonical meaning on next token or exclamation test \ escape next character as literal (\$ \= \~ \(etc) #xxx a constant number symbol, but only allowed on right side of comparison ------default values -1234567890 number token 12.14 number token 1435 number token a-z,A-Z,|,_ normal token , normal token (internal sentence punctuation) - period will never exist since we strip tail and cant be internal ----- these are things which must all be insured lower case (some can be on left or right side of comparison op) % system variable ~dat topic/set reference a: thru u: responder codes if/loop/jsonloop constructs ^call call function/macro calls with or without ^ ^fnvar function variables ^$glblvar global function variables $ user variable @ debug ahd factset references labels on responders responder types s: u: t: r: name of topic or concept $x : = y(do assignment and do not fail) $x:+=y, $x : -= y, $x : *= y, $x : /= y, $x : %= y, $x : ^= y, $^x:=.... define function #endif patternStarter = data; // for bug messages int oldsupplementalColumn = supplementalColumn; supplementalColumn = 0; char word[MAX_WORD_SIZE * 10]; char nestKind[PATTERNDEPTH]; char* nestData[PATTERNDEPTH]; STACKREF keywordList[PATTERNDEPTH]; int nestLine[PATTERNDEPTH]; int nestIndex = 0; patternContext = true; unsigned int conceptIndex = 0; // id of concept set char* conceptBufferLevelStart[PATTERNDEPTH]; //start of currentConceptBuffer char* currentConceptBuffer = NULL; // movable ptr static char* conceptbase = NULL; // start of xfer buffer char* currentConceptXfer = NULL; // movable ptr char conceptStarted[PATTERNDEPTH]; // have we inited this level yet? char* stackbase = AllocateStack(NULL, 4); char* start = ptr; // if macro call, there is no opening ( or closing ) // We claim an opening and termination comes from finding a toplevel token if (macro) { nestData[nestIndex] = data; nestKind[nestIndex++] = '('; } bool variableGapSeen = false; // wildcard pending // prefix characters bool memorizeSeen = false; // memorization pending bool quoteSeen = false; // saw ' bool notSeen = false; // saw ! bool bidirectionalSeen = false; // saw *~nb bool doubleNotSeen = false; // saw !! size_t len; bool startSeen = false; // starting token or not char* startPattern = data; char* startOrig = ptr; char* backup; // these buffer allocations must be last to balance Freebuffers in endScriptCompiler currentConceptBuffer = conceptBufferLevelStart[0] = AllocateBuffer(); conceptIndex = 0; conceptStarted[0] = 0; currentConceptXfer = conceptbase = AllocateBuffer(); while (ALWAYS) // read as many tokens as needed to complete the definition { backup = ptr; bool blockspace = false; unsigned int priorColumn = currentLineColumn; ptr = ReadNextSystemToken(in,ptr,word); if (!*word) break; // end of file if (!strcmp(word, "==") || !strcmp(word, "=")) WARNSCRIPT((char*)"== or = used standalone in pattern. Shouldn't it be attached to left and right tokens?\r\n") // we came from pattern IF and lack a ( if (ifstatement && *word != '(' && nestIndex == 0) { nestData[nestIndex] = data; nestKind[nestIndex++] = '('; } MakeLowerCopy(lowercaseForm,word); if (TopLevelUnit(lowercaseForm) || TopLevelRule(lowercaseForm)) // end of pattern { ptr -= strlen(word); // safe break; } char c = 0; char* assignment = FindAssignment(word); if (assignment) // assignment, do normal analysis on 1st argument { c = *assignment; *assignment = 0; } char* comparison = FindComparison(word); char comparisonchar = 0; if (comparison) // comparison, do normal analysis on 1st argument { comparisonchar= c = *comparison; *comparison = 0; } switch(*word) // ordinary tokens, not the composite comparison blob { // token prefixes case '!': // NOT if (quoteSeen) BADSCRIPT((char*)"PATTERN-1 Cannot have ' and ! in succession\r\n") if (memorizeSeen) BADSCRIPT((char*)"PATTERN-2 Cannot use _ before _\r\n") if (notSeen) BADSCRIPT((char*)"PATTERN-3 Cannot have two ! in succession\r\n") if (!word[1]) BADSCRIPT((char*)"PATTERN-4 Must attach ! to next token. If you mean exclamation match, use escaped ! \r\n %s\r\n",ptr) notSeen = true; if (word[1] == '!') doubleNotSeen = true; if (comparison) *comparison = c; ptr -= strlen(word); // safe if (*ptr == '!') ++ptr; if (*ptr == '!') ++ptr; // possible !! allowed continue; case '_': // memorize OR var reference if (quoteSeen && !IsDigit(word[1])) { patternEnder = data; BADSCRIPT((char*)"PATTERN-1 Cannot have ' and _ in succession except when quoting a match variable. Need to reverse them\r\n") } if (memorizeSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-6 Cannot have two _ in succession\r\n") } if (!word[1]) // allow separation which will be implied as needed { if (!ifstatement) { patternEnder = data; BADSCRIPT((char*)"PATTERN-7 Must attach _ to next token. If you mean _ match, use escaped _. %s\r\n", ptr) } } if (IsDigit(word[1])) // match variable { if (GetWildcardID(word) < 0) { BADSCRIPT((char*)"PATTERN-8 _%d is max match reference - %s\r\n", MAX_WILDCARDS - 1, word) patternEnder = data; } char* follow = word + 2; if (IsDigit(*follow)) ++follow; if (*follow) { if (*follow == '-' || *follow == '+') BADSCRIPT((char*)"PATTERN-8 %s has attached stuff. Did you intend @%s expression?\r\n", word,word) } if (*ptr == '[' || *ptr == '{' || *ptr == '(') blockspace = true; break; } memorizeSeen = true; quoteSeen = false; if (comparison) *comparison = c; len = strlen(word) - 1 ; ptr -= len; // the remnant strncpy(ptr,word+1, len); // this allows a function parameter (originally ^word but now ^0) to properly reset continue; case '\'': // original (non-canonical) token - possessive must be \'s or \' if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-10 Cannot have two ' in succession\r\n") } if (!word[1]) { patternEnder = data; BADSCRIPT((char*)"PATTERN-11 Must attach ' to next token. If you mean ' match, use \' \r\n %s\r\n", ptr) } quoteSeen = true; variableGapSeen = false; if (comparison) *comparison = c; len = strlen(word) - 1 ; ptr -= len; // the remnant strncpy(ptr,word+1, len); // this allows a function parameter (originally ^word but now ^0) to properly reset continue; case '<': // sentence start < or unordered start << if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-12 Cannot use ' before < or <<\r\n") } if (memorizeSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-13 Cannot use _ before < or <<\r\n") } if (word[1] == '<') // << unordered start { if (memorizeSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-14 Cannot use _ before << \r\n") } if (nestKind[nestIndex - 1] == '<') { patternEnder = data; BADSCRIPT((char*)"PATTERN-15 << already in progress\r\n") } if (variableGapSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-16 Cannot use * before <<\r\n") } // close [ or { for a moment if (nestKind[nestIndex - 1] == '[' || nestKind[nestIndex - 1] == '{') { currentConceptBuffer = conceptBufferLevelStart[conceptIndex]; // resume here strcpy(currentConceptXfer, currentConceptBuffer); currentConceptXfer += strlen(currentConceptXfer); if (!livecall && *currentConceptBuffer) // we had some member { sprintf(currentConceptXfer, "%s", ")\r\n"); currentConceptXfer += 3; } *currentConceptBuffer = 0; *currentConceptXfer = 0; conceptBufferLevelStart[conceptIndex] = currentConceptBuffer; conceptStarted[conceptIndex] = 0; } nestLine[nestIndex] = (currentFileLine << 16) | currentLineColumn; nestData[nestIndex] = data; nestKind[nestIndex++] = '<'; } else if (word[1]) { patternEnder = data; BADSCRIPT((char*)"PATTERN-17 %s cannot start with <\r\n", word) } variableGapSeen = false; break; case '@': if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-18 Quoting @ is meaningless.\r\n"); } if (memorizeSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-19 Cannot use _ before @\r\n") } if (word[1] == '_') // set match position @_5 { if (GetWildcardID(word + 1) >= MAX_WILDCARDS) { patternEnder = data; BADSCRIPT((char*)"PATTERN-? %s is not a valid positional reference - must be < %d\r\n", word, MAX_WILDCARDS) } char* end = word + 3; while (IsDigit(*end)) ++end; if (*end) { if (*end == '+' && (!end[1] || end[1] == 'i')) {;} else if (*end == '-' && (!end[1] || end[1] == 'i')) {;} else { patternEnder = data; BADSCRIPT((char*)"PATTERN-? %s is not a valid positional reference - @_2+ or @_2- or @_2 would be\r\n", word) } } variableGapSeen = false; // no longer after anything. we are changing direction } else if (!stricmp(word, "@retry")) {} else if (GetSetID(word) < 0) { patternEnder = data; BADSCRIPT((char*)"PATTERN-20 %s is not a valid factset reference\r\n", word) // factset reference } else if (!GetSetMod(word)) { patternEnder = data; BADSCRIPT((char*)"PATTERN-20 %s is not a valid factset reference\r\n", word) // factset reference } break; case '>': // sentence end > or unordered end >> if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-21 Cannot use ' before > or >>\r\n") } if (word[1] == '>') // >> { if (memorizeSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-22 Cannot use _ before >> \r\n") } if (nestKind[nestIndex - 1] != '<') { patternEnder = data; BADSCRIPT((char*)"PATTERN-23 Have no << in progress\r\n") } if (variableGapSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-24 Cannot use wildcard inside >>\r\n") } if (nestKind[--nestIndex] != '<') { patternStarter = nestData[nestIndex]; patternEnder = data; char d[100]; strncpy(d, patternStarter, 60); d[59] = 0; BADSCRIPT((char*)"PATTERN-24 >> should be closing %c started at line %d col %d\r\n", nestKind[nestIndex], nestLine[nestIndex] >> 16, nestLine[nestIndex] & 0x00ffff,d) } } variableGapSeen = false; break; // sentence end align case '(': // sequential pattern unit begin // close [ or { for a moment if (nestKind[nestIndex-1] == '[' || nestKind[nestIndex-1] == '{') { currentConceptBuffer = conceptBufferLevelStart[conceptIndex]; // resume here strcpy(currentConceptXfer, currentConceptBuffer); currentConceptXfer += strlen(currentConceptXfer); if (!livecall && *currentConceptBuffer) // we had some member { sprintf(currentConceptXfer, "%s", ")\r\n"); currentConceptXfer += 3; } *currentConceptBuffer = 0; *currentConceptXfer = 0; conceptBufferLevelStart[conceptIndex] = currentConceptBuffer; conceptStarted[conceptIndex] = 0; } if (bidirectionalSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-34 ] Cant use ( after bidirectional gap- scanning backwards is bad\r\n") } if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-25 Quoting ( is meaningless.\r\n") } nestLine[nestIndex] = (currentFileLine << 16) | currentLineColumn; nestData[nestIndex] = data; nestKind[nestIndex++] = '('; break; case ')': // sequential pattern unit end if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-26 Quoting ) is meaningless.\r\n") } if (memorizeSeen && !ifstatement) { patternEnder = data; BADSCRIPT((char*)"PATTERN-27 Cannot use _ before )\r\n") } if (variableGapSeen && nestIndex > 1) { patternEnder = data; BADSCRIPT((char*)"PATTERN-26 Cannot have wildcard followed by )\r\n") } if (nestKind[--nestIndex] != '(') { patternEnder = data; patternStarter = nestData[nestIndex]; char d[100]; strncpy(d, patternStarter, 60); d[59] = 0; BADSCRIPT((char*)"PATTERN-9 ) should be closing %c started at line %d col %d %s\r\n", nestKind[nestIndex], nestLine[nestIndex] >> 16, nestLine[nestIndex] & 0x00ffff,d) } break; case '[': // list of pattern choices begin if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-30 Quoting [ is meaningless.\r\n"); } if (nestKind[nestIndex - 1] == '[' || nestKind[nestIndex - 1] == '{') { currentConceptBuffer = conceptBufferLevelStart[conceptIndex]; // resume here strcpy(currentConceptXfer, currentConceptBuffer); currentConceptXfer += strlen(currentConceptXfer); if (!livecall && *currentConceptBuffer) // we had some member { sprintf(currentConceptXfer, "%s", ")\r\n"); currentConceptXfer += 3; } *currentConceptBuffer = 0; *currentConceptXfer = 0; conceptBufferLevelStart[conceptIndex] = currentConceptBuffer; conceptStarted[conceptIndex] = 0; } nestLine[nestIndex] = (currentFileLine << 16) | currentLineColumn; keywordList[nestIndex] = 0; nestData[nestIndex] = data; nestKind[nestIndex++] = '['; conceptBufferLevelStart[++conceptIndex] = currentConceptBuffer; conceptStarted[conceptIndex] = 0; break; case ']': // list of pattern choices end if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-31 Quoting ] is meaningless.\r\n") } if (memorizeSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-32 Cannot use _ before ]\r\n") } if (variableGapSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-33 Cannot have wildcard followed by ]\r\n") } if (nestKind[--nestIndex] != '[') { patternEnder = data; patternStarter = nestData[nestIndex]; char d[100]; strncpy(d, patternStarter, 60); d[59] = 0; BADSCRIPT((char*)"PATTERN-34 ] should be closing %c started at line %d col %d %s\r\n", nestKind[nestIndex], nestLine[nestIndex] >> 16, nestLine[nestIndex] & 0x00ffff,d) } currentConceptBuffer = conceptBufferLevelStart[conceptIndex--]; // resume here strcpy(currentConceptXfer, currentConceptBuffer); currentConceptXfer += strlen(currentConceptXfer); if (!livecall && *currentConceptBuffer) // we had some member { sprintf(currentConceptXfer,"%s",")\r\n"); currentConceptXfer += 3; } *currentConceptBuffer = 0; *currentConceptXfer = 0; break; case '{': // list of optional choices begins if (nestKind[nestIndex - 1] == '[') { patternEnder = data; BADSCRIPT((char*)"PATTERN-15 {} within [] is pointless because it always matches\r\n") } if (nestKind[nestIndex - 1] == '<' && !memorizeSeen && !strstr(start,"^matches") && compiling != PIECE_COMPILE) WARNSCRIPT((char*)"PATTERN-15 {} within << >> is pointless unless you memorize or use ^matches because it always matches\r\n") if (bidirectionalSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-34 ] Cant use { after bidirectional gap - will always match scanning backwards\r\n") } // close [ or { for a moment if (nestKind[nestIndex - 1] == '[' || nestKind[nestIndex - 1] == '{') { currentConceptBuffer = conceptBufferLevelStart[conceptIndex]; // resume here strcpy(currentConceptXfer, currentConceptBuffer); currentConceptXfer += strlen(currentConceptXfer); if (!livecall && *currentConceptBuffer) // we had some member { sprintf(currentConceptXfer, "%s", ")\r\n"); currentConceptXfer += 3; } *currentConceptBuffer = 0; *currentConceptXfer = 0; conceptBufferLevelStart[conceptIndex] = currentConceptBuffer; conceptStarted[conceptIndex] = 0; } if (variableGapSeen) { // if we can see end of } and it has a gap after it... thats a problem - two gaps in succession is the equivalent char* end = strchr(ptr,'}'); if (end) { end = SkipWhitespace(end); if (*end == '*') WARNSCRIPT((char*)"Wildcard before and after optional will probably not work since wildcards wont know where to end if optional fails. Use some other formulation\r\n") } } if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-35 Quoting { is meaningless.\r\n"); } if (notSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-36 !{ is pointless since { can fail or not anyway\r\n") } if (nestIndex && nestKind[nestIndex - 1] == '{') BADSCRIPT((char*)"PATTERN-37 {{ is illegal\r\n") keywordList[nestIndex] = 0; nestLine[nestIndex] = (currentFileLine << 16) | currentLineColumn; nestData[nestIndex] = data; nestKind[nestIndex++] = '{'; conceptBufferLevelStart[++conceptIndex] = currentConceptBuffer; conceptStarted[conceptIndex] = 0; break; case '}': // list of optional choices ends if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-38 Quoting } is meaningless.\r\n"); } if (memorizeSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-39 Cannot use _ before }\r\n") } if (variableGapSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-40 Cannot have wildcard followed by }\r\n") } if (nestKind[--nestIndex] != '{') { patternEnder = data; patternStarter = nestData[nestIndex]; char d[100]; strncpy(d, patternStarter, 60); d[59] = 0; BADSCRIPT((char*)"PATTERN-41 } should be closing %c started at line %d col %d %s\r\n", nestKind[nestIndex], nestLine[nestIndex] >> 16, nestLine[nestIndex] & 0x00ffff,d) } currentConceptBuffer = conceptBufferLevelStart[conceptIndex--]; // resume here strcpy(currentConceptXfer, currentConceptBuffer); currentConceptXfer += strlen(currentConceptXfer); if (!livecall && *currentConceptBuffer) // we had some member { sprintf(currentConceptXfer, "%s", ")\r\n"); currentConceptXfer += 3; } *currentConceptBuffer = 0; *currentConceptXfer = 0; break; case '\\': // literal next character if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-42 Quoting an escape is meaningless.\r\n"); } if (!word[1]) { patternEnder = data; BADSCRIPT((char*)"PATTERN-43 Backslash must be joined to something to escape\r\n") } variableGapSeen = false; if (word[1] && IsAlphaUTF8(word[1] )) memmove(word,word+1,strlen(word)); // escaping a real word, just use it break; case '*': // gap: * *1 *~2 (infinite, exactly 1 word, 0-2 words, 0-2 words, 1 word before) and *alpha*x* is form match if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-44 Quoting a wildcard\r\n"); } if (nestKind[nestIndex - 1] == '<') { patternEnder = data; BADSCRIPT((char*)"PATTERN-45 Can not have wildcard %s inside << >>\r\n", word) } if (nestKind[nestIndex - 1] != '(' && (word[1] == '~' || !word[1])) { patternEnder = data; BADSCRIPT((char*)"PATTERN-45 Can only have variable wildcard %s inside ( )\r\n", word) } if (variableGapSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-46 Cannot have wildcard followed by %s\r\n", word) } if (IsAlphaUTF8(word[1])) break; // find this word as fragmented spelling like sch*ding* since it will have * as a prefix // gaps of various flavors if (notSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-47 cannot have ! before gap - %s\r\n", word) } if (IsDigit(word[1])) // enumerated gap size { int n = word[1] - '0'; if (n == 0) { patternEnder = data; BADSCRIPT((char*)"PATTERN-48 *0 is meaningless\r\n") } if (word[2]) { BADSCRIPT((char*)"PATTERN-49 *9 is the largest gap allowed or bad stuff is stuck to your token- %s\r\n", word) } } else if (word[1] == '-') // backwards { int n = word[2] - '0'; if (n == 0) { patternEnder = data; BADSCRIPT((char*)"PATTERN-50 *-1 is the smallest backward wildcard allowed - %s\r\n", word) } if (word[3]) { patternEnder = data; BADSCRIPT((char*)"PATTERN-51 *-9 is the largest backward wildcard or bad stuff is stuck to your token- %s\r\n", word) } } else if (word[1] == '~') // close-range gap { if (nestKind[nestIndex - 1] == '{' || nestKind[nestIndex - 1] == '[') { patternEnder = data; BADSCRIPT((char*)"PATTERN-5? cannot stick %s wildcard inside {} or []\r\n", word) } variableGapSeen = true; int n = word[2] - '0'; if (!word[2]) { patternEnder = data; BADSCRIPT((char*)"PATTERN-52 *~ is not legal, you need a digit after it\r\n") } else if (n == 0 && word[2] != '0') { patternEnder = data; BADSCRIPT((char*)"PATTERN-53 *~1 is the smallest close-range gap - %s\r\n", word) } else if (word[3] && word[3] != 'b') { patternEnder = data; BADSCRIPT((char*)"PATTERN-54 *~9 is the largest close-range gap or bad stuff is stuck to your token- %s\r\n", word) } } else if (word[1]) { patternEnder = data; BADSCRIPT((char*)"PATTERN-55 * jammed against some other token- %s\r\n", word) } else { if (nestKind[nestIndex - 1] == '{' || nestKind[nestIndex - 1] == '[') { patternEnder = data; BADSCRIPT((char*)"PATTERN-5? cannot stick * wildcard inside {} or []\r\n") } variableGapSeen = true; // std * unlimited wildcard } startSeen = true; break; case '?': // question input ? if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-56 Quoting a ? is meaningless.\r\n"); } if (memorizeSeen && word[1] != '$') { patternEnder = data; BADSCRIPT((char*)"PATTERN-57 Cannot use _ before ?\r\n") } if (variableGapSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-58 Cannot have wildcards before ?\r\n") } break; case USERVAR_PREFIX: // user var if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-59 Quoting a $ variable is meaningless - %s\r\n", word); } // $x? is a search to match and is legal if (variableGapSeen && memorizeSeen && comparisonchar != '?') BADSCRIPT((char*)"PATTERN-59 Variable gap in progress, cannot reference variable - %s\r\n", word); break; case '"': // string { // you can quote a string, because you are quoting its members variableGapSeen = false; strcpy(word,JoinWords(BurstWord(word,CONTRACTIONS)));// change from string to std token WritePatternWord(word); WriteKey(word); unsigned int n = 0; char* ptr = word; while ((ptr = strchr(ptr,'_'))) { ++n; ++ptr; } if (n >= SEQUENCE_LIMIT) WARNSCRIPT((char*)"PATTERN-? Too many words in string %s, may never match unless you set $cs_sequence high enough\r\n",word) } goto DEFLT; case SYSVAR_PREFIX: // system data // you can quote system variables because %topic returns a topic name which can be quoted to query if (memorizeSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-60 Cannot use _ before system variable - %s\r\n", word) } if (!word[1]); // simple % else if (!FindWord(word) && stricmp(word, "%trace_on") && stricmp(word, "%trace_off") && stricmp(word, "%testpattern-prescan") && stricmp(word, "%testpattern-nosave")) { patternEnder = data; BADSCRIPT((char*)"PATTERN-61 %s is not a system variable\r\n", word) } if (comparison) *comparison = c; variableGapSeen = false; break; case '~': variableGapSeen = false; if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-61 cannot quote set %s because it can't be determined if set comes from original or canonical form\r\n", word) } startSeen = true; WriteKey(word); CheckSetOrTopic(word); // set or topic goto DEFLT; default: // normal token ( and anon function call) DEFLT: if (noPatternOptimization || assignment || comparison || *word == '^' || *word == '_' || *word == '@' || *word == '$' || *word == '%') {;} else if (strchr(word, '*') || strchr(word, '?')) { ; } // wildcard word patterns else if (nestKind[nestIndex - 1] == '[' || nestKind[nestIndex - 1] == '{') { // generate compile-time anonymous concept set for word list [] {}. // but it makes ^mark and ^unmark from outside problematic char controls[100]; *controls = 0; if (notSeen) strcat(controls, "!"); if (*controls && memorizeSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-67 Cannot have ! and _ together\r\n") } if (quoteSeen) strcat(controls, "'"); notSeen = quoteSeen = false; char name[MAX_WORD_SIZE]; if (!conceptStarted[conceptIndex ]) // not yet started { unsigned int layer = 1; if (buildId == BUILD0) layer = 0; else if (buildId == BUILD1) layer = 1; if (myBot && !livecall) { sprintf(name, (char*)"~%u%05u`%s", layer, ++conceptID, PrintU64(myBot)); } else sprintf(name, "~%u%05u", layer,++conceptID); conceptStarted[conceptIndex ] = 1; if (!livecall) sprintf(currentConceptBuffer, "%s ( ", name); else sprintf(currentConceptBuffer, "%s ", name); currentConceptBuffer += strlen(currentConceptBuffer); sprintf(currentConceptBuffer, "%s%s ", controls,word); currentConceptBuffer += strlen(currentConceptBuffer); sprintf(word, "~%u%05u", layer, conceptID); // no bot id attached } else { sprintf(currentConceptBuffer, "%s%s ", controls,word); currentConceptBuffer += strlen(currentConceptBuffer); *word = 0; // dont use it } } variableGapSeen = false; startSeen = true; break; } if (assignment) { *assignment = c; if (memorizeSeen && assignment[1]) { patternEnder = data; BADSCRIPT((char*)"PATTERN-57 Cannot use _ before an assignment\r\n") } if (variableGapSeen) BADSCRIPT((char*)"PATTERN-16 Cannot use * before assignment since memorization will be incomplete\r\n") // rebuild token char tmp[MAX_WORD_SIZE * 4]; *tmp = ':'; // assignment header len = (assignment - word) + 2; // include the : and jump code in length if (len > 70) { patternEnder = data; BADSCRIPT((char*)"PATTERN-65 Left side of assignment must not exceed 70 characters - %s\r\n", word) } char* x = tmp + 1; Encode(len, x, 1); // function definition if (*word == '$' && word[1] == '^' && *assignment == ':' && assignment[1] == '=') { char* fn = assignment + 4; // the function definition code len = strlen(fn); if (fn[len - 1] == '"') fn[len - 1] = 0; char* data = AllocateBuffer(); char* holdstart = (char*)linestartpoint; linestartpoint = fn; // lie about where data came from to get accurate column data supplementalColumn = priorColumn + fn - word; currentLineColumn = 0; fn = ReadMacro(fn, NULL, "apimacro:", 0,data); linestartpoint = holdstart; currentLineColumn = supplementalColumn; supplementalColumn = 0; char* end = strchr(fn, '`'); *end = 0; char* aftername = strchr(assignment + 4, '^'); char* paren = strchr(aftername, '('); char* space = strchr(aftername, ' '); if (space && space < paren) paren = space; *paren = ' '; strcpy(paren +1, fn); strcpy(tmp + 2, word); // copy left side over strcat(tmp,"\""); FreeBuffer(); patternContext = true; } else strcpy(tmp + 2, word); // copy left side over char* end = word + strlen(word); if (*(end - 1) == '=' ) BADSCRIPT("PATTERN-57x pattern assignment missing right hand side- %s\r\n",word) strcpy(word, tmp); // replace original token } else if (comparison) // is a comparison of some kind { if (memorizeSeen && comparison[1]) { patternEnder = data; BADSCRIPT((char*)"PATTERN-57 Cannot use _ before a comparison\r\n") } if (variableGapSeen && comparisonchar != '?' && memorizeSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-16 Cannot use * before comparison since memorization will be incomplete\r\n") } if (*word == USERVAR_PREFIX && word[1] == LOCALVAR_PREFIX) { char* dot = strchr(word,'.'); if (dot) *dot = 0; AddDisplay(word); if (dot) *dot = '.'; } *comparison = comparisonchar; if (comparisonchar == '!') // move not operator out in front of token { *data++ = '!'; strcpy(data++, " "); // and space it, so if we see "!=shec?~hello" we wont think != is an operator, instead = is a jump infof len = strlen(comparison); memmove(comparison,comparison+1,len); } if (quoteSeen && *word == '_' && IsDigit(word[1])) // quoted match variable { quoteSeen = false; memmove(word+1,word,strlen(word)+1); *word = '\''; ++comparison; // moved over for the added ' } char* rhs = comparison+1; if (*rhs == '=' || *rhs == '?') ++rhs; if (*rhs == '^' && IsAlphaUTF8(rhs[1])) { patternEnder = data; BADSCRIPT((char*)"%s is not a current function variable\r\n", rhs) } if (!*rhs && *word == USERVAR_PREFIX) {} // allowed member in sentence else if (!*rhs && *word == '_' && IsDigit(word[1])); // allowed member in sentence else if (*rhs == '#') // names a constant #define to replace with number value { uint64 n = FindPropertyValueByName(rhs+1); if (!n) n = FindSystemValueByName(rhs+1); if (!n) n = FindParseValueByName(rhs+1); if (!n) n = FindMiscValueByName(rhs+1); if (!n) { patternEnder = data; BADSCRIPT((char*)"PATTERN-63 No #constant recognized - %s\r\n", rhs + 1) } sprintf(rhs,(char*)"%lld",(long long int) n); } else if (IsAlphaUTF8DigitNumeric(*rhs) ) // LITERAL has no constraints { // WriteKey(rhs); // WritePatternWord(rhs); // ordinary token } else if (*rhs == '~') { MakeLowerCase(rhs); CheckSetOrTopic(rhs); } else if (*rhs == '_' || *rhs == '@'); // match variable or factset variable else if (*rhs == USERVAR_PREFIX) { MakeLowerCase(rhs); // user variable if (rhs[1] == LOCALVAR_PREFIX) { char* dot = strchr(rhs,'.'); if (dot) *rhs = 0; AddDisplay(rhs); if (dot) *word = '.'; } } else if (*rhs == SYSVAR_PREFIX) MakeLowerCase(rhs); // system variable else if (*rhs == '^' && (rhs[1] == '_' || rhs[1] == USERVAR_PREFIX || IsDigit(rhs[1]))) MakeLowerCase(rhs); // indirect match variable or indirect user vaiable or function variable else if (!*rhs && *comparison == '?' && !comparison[1]); else if (*rhs == '\'' && (rhs[1] == USERVAR_PREFIX || rhs[1]== '_')); // unevaled user variable or raw match variable else if (!comparison[2] && *word == USERVAR_PREFIX); // find in sentence else if (*rhs == '"' && rhs[strlen(rhs)-1] == '"'){;} // quoted string else { patternEnder = data; BADSCRIPT((char*)"PATTERN-64 Illegal comparison %s or failed to close prior rule starting at %s\r\n", word, GetRuleElipsis(start)) } len = (comparison - word) + 2; // include the = and jump code in length // rebuild token char tmp[MAX_WORD_SIZE]; *tmp = '='; // comparison header if (len > 70) { patternEnder = data; BADSCRIPT((char*)"PATTERN-65 Left side of comparison must not exceed 70 characters - %s\r\n", word) } char* x = tmp + 1; Encode(len,x,1); strcpy(tmp+2,word); // copy left side over strcpy(word,tmp); // replace original token } else if (*word == '~') CheckSetOrTopic(word); ReadNextSystemToken(in,ptr,nextToken,true,true); if (*word == '^') // function call or function var ref or indirect function variable assign ref like ^$$tmp = null { if (quoteSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-? Cannot use quote before ^ function call or variable\r\n") } if (notSeen) { *data++ = '!'; if (doubleNotSeen) *data++ = '!'; doubleNotSeen = notSeen = false; } if (memorizeSeen) { if (!IsDigit(word[1]) && word[1] != USERVAR_PREFIX) { patternEnder = data; BADSCRIPT((char*)"PATTERN-66 Cannot use _ before ^ function call\r\n") } *data++ = '_'; memorizeSeen = false; } if (word[1] == USERVAR_PREFIX) { strcpy(data,word); data += strlen(data); } else { ptr = ReadCall(word,ptr,in,data,*nextToken == '(',false); if (PatternRelationToken(ptr)) // immediate relation bound to call? { ptr = ReadNextSystemToken(in,ptr,word); strcpy(data,word); data += strlen(data); } } strcpy(data++, " "); continue; } // put out the next token and space if (notSeen) { if (memorizeSeen) { patternEnder = data; BADSCRIPT((char*)"PATTERN-67 Cannot have ! and _ together\r\n") } *data++ = '!'; if (doubleNotSeen) *data++ = '!'; doubleNotSeen = notSeen = false; } if (quoteSeen) { *data++ = '\''; quoteSeen = false; } if (memorizeSeen) { *data++ = '_'; if (ifstatement) strcpy(data++, " "); memorizeSeen = false; } if (IsAlphaUTF8(*word) || (*word == '*' && IsAlphaUTF8(word[1])) ) { char* p; if ((p = strchr(word,'*'))) // wild word fragment? reformat to have leading * and lower case the test { char hold[MAX_WORD_SIZE]; MakeLowerCopy(hold,word); *word = '*'; strcpy(word+1,hold); } else if (IsPunctuation(*word) && !word[1]) // punctuation { WriteKey(word); WritePatternWord(word); // memorize it to know its important } else // ordinary word - break off possessives as needed { size_t lenx = strlen(word); unsigned int ignore = 0; if (lenx > 1 && word[lenx-1] == '\'' && word[lenx-2] != '_') // ending ' possessive plural { if (ifstatement && !strcmp(word,"PATTERN")) {;} // allow uppercase else { OverCover(word, keywordList,nestKind,nestIndex); WritePatternWord(word); WriteKey(word); } word[--lenx] = 0; ignore = 1; } else if (lenx > 2 && word[lenx-1] == 's' && word[lenx-2] == '\'' && word[lenx-3] != '_') // ending 's possessive singular { OverCover(word, keywordList, nestKind, nestIndex); WriteKey(word); WritePatternWord(word); lenx -= 2; word[lenx] = 0; ignore = 2; } if (stricmp(current_language, "japanese") || !stricmp(current_language, "chinese")) { strcpy(word, JoinWords(BurstWord(word, CONTRACTIONS))); // change to std token if (!livecall && spellCheck && !(spellCheck & NO_SPELL)) SpellCheckScriptWord(word, startSeen ? 1 : 0, false); else if (livecall && compiling == PIECE_COMPILE) SpellCheckScriptWord(word, startSeen ? 1 : 0, false); if (strcmp(word, "PATTERN") ) { OverCover(word, keywordList, nestKind, nestIndex); WriteKey(word); WritePatternWord(word); // memorize it to know its important } } if (ignore) { strcpy(data,word); data += strlen(data); *data++ = '_'; if (ignore == 1) strcpy(word,(char*)"'"); else strcpy(word,(char*)"'s"); } } } strcpy(data,word); // default unchanged (but not updated yet to accept) len = strlen(data); data += len; if (!blockspace) strcpy(data++, " "); else blockspace = false; // _1[ for instance bidirectionalSeen = false; if (nestIndex == 0) break; // we completed this level if (*word == '*' && word[1] == '~' && word[3] && word[3] == 'b') bidirectionalSeen = true; } *data = 0; if (csapicall == COMPILE_PATTERN && ptr && *ptr) { ptr = TrimSpaces(ptr); if (*ptr) { char msg[MAX_WORD_SIZE]; strncpy(msg, ptr, 20); msg[20] = 0; BADSCRIPT((char*)"PATTERN-78 Excess data after pattern closed %s \r\n", msg) } } // leftovers? if (macro && nestIndex != 1) { patternEnder = data; BADSCRIPT((char*)"PATTERN-68 Failed to balance ( or [ or { properly in macro for %s\r\n", startPattern) } else if (!macro && nestIndex != 0) { patternEnder = data; patternStarter = nestData[nestIndex-1]; BADSCRIPT((char*)"PATTERN-69 Failed to close %c started at line %d col %d : %s\r\n", nestKind[nestIndex - 1], nestLine[nestIndex-1] >> 16, nestLine[nestIndex-1] & 0x00ffff, startPattern); } patternContext = false; ReleaseStack(stackbase); if (!*conceptbase) {;} // no optimization happened else if (!livecall) // not from compilepattern or dynamic testpattern -- empty files to start { char filename[SMALL_WORD_SIZE]; int layer = 1; if (buildId == BUILD0) layer = 0; else if (buildId == BUILD1) layer = 1; sprintf(filename, (char*)"%s/BUILD%d/keywords%d.txt", topicfolder, layer, layer); FILE* out = FopenUTF8WriteAppend(filename); fprintf(out, "%s", conceptbase); fclose(out); } else // ^compilepattern optimzation { char* revised = AllocateBuffer(); strcpy(revised, startPattern); strcpy(startPattern, conceptbase); strcat(startPattern, revised); FreeBuffer(); } FreeBuffer(); // conceptBufferLevelStart FreeBuffer(); // conceptbase patternStarter = NULL; patternEnder = NULL; supplementalColumn = oldsupplementalColumn; return ptr; } static char* ReadChoice(char* word, char* ptr, FILE* in, char* &data,char* rejoinders) { // returns the stored data, not the ptr, starts with the [ *data++ = '['; strcpy(data++, " "); ReadNextSystemToken(in,ptr,word,true); // get possible rejoinder label if (word[1] == ':' && !word[2]) // is rejoinder label { if (*word >= 'A' && *word < 'Q') *word = *word - 'A' + 'a'; if (*word < 'a' || *word >= 'q') BADSCRIPT((char*)"CHOICE-1 Bad level label %s in [ ]\r\n",word) if (rejoinders) rejoinders[(int)(*word - 'a' + 1)] = 2; // authorized level *data++ = *word; *data++ = word[1]; strcpy(data++, " "); ptr = ReadNextSystemToken(in,ptr,word,false); } ptr = ReadOutput(false,true,ptr,in,data,rejoinders,NULL,NULL,true); *data = 0; return ptr; } static bool ValidIfOperand(char c) { return (c != '<' && c != '+' && c != '-' && c != '*' && c != '/' && c != '&' && c != '|' && c != '%' && c != '=' && c != '>' && c != '^' && c != '!' && c != '?'); } char* ReadIfTest(char* ptr, FILE* in, char* &data) { priorLine = currentFileLine; char word[MAX_WORD_SIZE]; int paren = 1; size_t len; // test is either a function call OR an equality comparison OR an IN relation OR an existence test // the followup will be either (or < > == or IN or ) // The function call returns a status code, you cant do comparison on it // but both function and existence can be notted- IF (!$var) or IF (!read(xx)) // You can have multiple tests, separated by AND and OR. PATTERN: ptr = ReadNextSystemToken(in,ptr,word,false,false); if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } if (*word == '~' ) CheckSetOrTopic(word); // separate ! from things if not != and !? if (*word == '!' && word[1] && word[1] != '=' && word[1] != '?') { while (*--ptr != '!' && *ptr); ++ptr; word[1] = 0; } // actually a test joined on? char* at = word; while (*++at && ValidIfOperand(*at)) {;} // never look at first character if (*at) { len = strlen(at); if (*at == '-' && *word == '$') {;} // $atat-ata could be subtract or name, but cannot be subtract in if test else if (*at == '^' && *word == '\'') {;} // '^arg is not an operator else { *at = 0; ptr -= len; // back up to rescan at = ptr; while (!ValidIfOperand(*at)) {++at;} // where does operand end? memmove(at + 1, at, strlen(at)+1); *at = ' '; // separate operand } } bool notted = false; if (*word == '!' && !word[1]) { notted = true; *data++ = '!'; strcpy(data++, " "); ptr = ReadNextSystemToken(in,ptr,word,false,false); if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } } if (*word == '\'' && !word[1]) { *data++ = '\''; ptr = ReadNextSystemToken(in,ptr,word,false,false); if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } if (*word != '_' && *word != '^') BADSCRIPT((char*)"IF-3 Can only quote _matchvar (or functionvar of one) in IF test\r\n") } if (*word == '!') BADSCRIPT((char*)"IF-4 Cannot do two ! in a row\r\n") ReadNextSystemToken(in,ptr,nextToken,false,true); if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } MakeLowerCase(nextToken); if (*nextToken != '(' && *word == '^' && word[1] != '"' && IsAlphaUTF8(word[1])) BADSCRIPT((char*)"%s is not the name of a local function argument\r\n",word) if (*nextToken == '(') // function call? { if (*word != '^') // a call w/o its ^ { char rename[MAX_WORD_SIZE]; *rename = '^'; strcpy(rename+1,word); // in case user omitted the ^ strcpy(word,rename); } ptr = ReadCall(word,ptr,in,data,true,false); // read call ReadNextSystemToken(in,ptr,nextToken,false,true); if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } if (RelationToken(nextToken)) { if (notted) BADSCRIPT((char*)"IF-5 cannot do ! in front of comparison %s\r\n",nextToken) strcpy(data++, " "); ptr = ReadNextSystemToken(in,ptr,word,false,false); // swallow operator if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } strcpy(data,word); data += strlen(word); strcpy(data++, " "); ptr = ReadNextSystemToken(in,ptr,word,false,false); // swallow value if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } strcpy(data,word); data += strlen(word); } } else if (*nextToken == '!' && nextToken[1] == '?') { if (notted) BADSCRIPT((char*)"IF-6 cannot do ! in front of query %s\r\n",nextToken) if (*word == '\'' && word[1] == '_') {;} else if (*word != '@' &&*word != USERVAR_PREFIX && *word != '_' && *word != '^' && *word != SYSVAR_PREFIX) BADSCRIPT((char*)"IF test query must be with $var, _# or '_#, %sysvar, @1subject or ^fnarg -%s\r\n",word) strcpy(data,word); data += strlen(word); strcpy(data++, " "); ptr = ReadNextSystemToken(in,ptr,word,false,false); // swallow operator strcpy(data,word); data += strlen(word); strcpy(data++, " "); ptr = ReadNextSystemToken(in,ptr,word,false,false); // swallow value if (*word == '^' && !IsDigit(word[1])) BADSCRIPT((char*)"IF-7 not allowed 2nd function call in relation - %s\r\n",word) if (*word == '~') CheckSetOrTopic(word); strcpy(data,word); data += strlen(word); } else if (RelationToken(nextToken)) { if (notted && *nextToken != '?') BADSCRIPT((char*)"IF-8 cannot do ! in front of comparison %s\r\n",nextToken) if (*word == '\'' && ((word[1] == '^' && IsDigit(word[2])) || word[1] == USERVAR_PREFIX || word[1] == '_')) {;} // quoted variable else if (*word != '@' && *word != USERVAR_PREFIX && *word != '_' && *word != '^' && *word != SYSVAR_PREFIX && !IsAlphaUTF8(*word) && !IsDigit(*word) && *word != '+' && *word != '-') BADSCRIPT((char*)"IF test comparison 1st value must be number, word, $var, _#, sysvar, @1subject or ^fnarg -%s\r\n",word) strcpy(data,word); data += strlen(word); strcpy(data++, " "); ptr = ReadNextSystemToken(in,ptr,word,false,false); // swallow operator strcpy(data,word); data += strlen(word); strcpy(data++, " "); ptr = ReadNextSystemToken(in,ptr,word,false,false); // swallow value if (*word == '~') CheckSetOrTopic(word); if (*word == '^' && !IsDigit(word[1])) BADSCRIPT((char*)"IF-9 not allowed function call or active string in relation as 2nd arg - %s\r\n",word) strcpy(data,word); data += strlen(word); } else if (*nextToken == ')' || !stricmp(nextToken,(char*)"and") || !stricmp(nextToken, (char*)"&") || !stricmp(nextToken,(char*)"or")) // existence test { if (*word != USERVAR_PREFIX && *word != '_' && *word != '@' && *word != '^' && *word != SYSVAR_PREFIX && *word != '?' ) BADSCRIPT((char*)"IF-10 existence test - %s. Must be uservar or systemvar or _# or ? or @# or ~concept or ^^var \r\n",word) strcpy(data,word); data += strlen(word); } else BADSCRIPT((char*)"IF-11 illegal test %s %s . Use (X > Y) or (Foo()) or (X IN Y) or ($var) or (_3)\r\n",word,nextToken) strcpy(data++, " "); // check for close or more conditions ptr = ReadNextSystemToken(in,ptr,word,false,false); // ) if (*word == '~') CheckSetOrTopic(word); if (*word == ')') { *data++ = ')'; strcpy(data++, " "); } else if (!stricmp(word,(char*)"or") || !stricmp(word,(char*)"and") || !stricmp(word, (char*)"&")) { MakeLowerCopy(data,word); data += strlen(word); strcpy(data++, " "); goto PATTERN; // handle next element } else BADSCRIPT((char*)"IF-12 comparison must close with ) -%s .. Did you make a function call as 1st argument? that's illegal\r\n",word) *data = 0; return ptr; } static char* ReadBody(char* word, char* ptr, FILE* in, char* &data,char* rejoinders) { // stored data starts with the { char* start = data; ptr = ReadOutput(false,true,ptr,in,data,rejoinders,NULL,NULL); size_t len = strlen(start); if ((len + 3) >= maxBufferSize) BADSCRIPT((char*)"BODY-4 Body exceeding limit of %d bytes\r\n",maxBufferSize) return ptr; } #ifdef INFORMATION An IF consists of: if (test-condition code) xx {body code} yy else (test-condition code) xx {body code} yy else (1) xx {body code} yy spot yy is offset to end of entire if and xx if offset to next branch of if before "else". #endif char* ReadIf(char* word, char* ptr, FILE* in, char* &data,char* rejoinders) { char* bodyends[PATTERNDEPTH]; // places to patch for jumps unsigned int bodyendIndex = 0; char* original = data; strcpy(data,(char*)"^if "); data += 4; if (mapFile && dataBase && !livecall) { fprintf(mapFile, (char*)" if %u %u \r\n", currentFileLine, (unsigned int)(data - dataBase)); // readBuffer } patternContext = false; ++complexity; priorLine = currentFileLine; while (ALWAYS) { char* testbase = data; *data++ = 'a'; // reserve space for offset past pattern *data++ = 'a'; // next will be ( *data++ = 'a'; ptr = ReadNextSystemToken(in,ptr,word,false); // the '(' if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } MakeLowerCopy(lowercaseForm,word); if (!*word || TopLevelUnit(word) || TopLevelRule(lowercaseForm) || Rejoinder(lowercaseForm)) BADSCRIPT((char*)"IF-1 Incomplete IF statement - %s\r\n",word) if (*word != '(') BADSCRIPT((char*)"IF-2 Missing (for IF test - %s\r\n",word) *data++ = '('; strcpy(data++, " "); ptr = SkipWhitespace(ptr); if (!strnicmp(ptr,(char*)"pattern ",7)) { if (livecall) BADSCRIPT((char*)"Cannot use Pattern If during live compilation\r\n") ptr = ReadNextSystemToken(in,ptr,word,false); // pattern if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } strcpy(data,word); data += strlen(data); strcpy(data++, " "); char* original = data; patternContext = true; // swallow pattern ptr = ReadPattern(ptr,in,data,false,true); // read ( for real in the paren for pattern if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } patternContext = false; } else { ptr = ReadIfTest(ptr, in, data); // starts by reading the ( and ends having read ) } Encode((unsigned int)(data-testbase),testbase); // offset to after pattern // Encode(xcounter,data,2); //--- format: branch to after pattern, pattern, branch around next pattern, pattern, branch around next pattern or to end of if code char* ifbase = data; *data++ = 'a'; // reserve space for offset after the closing ), which is how far to go past body *data++ = 'a'; *data++ = 'a'; // swallow body of IF after test -- must have { surrounding now ReadNextSystemToken(in,ptr,word,false,true); // { if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } if (*word != '{') { char hold[MAX_WORD_SIZE]; strcpy(hold,word); ptr = ReadNextSystemToken(in,ptr,word,false,false); ReadNextSystemToken(in,ptr,word,false,true); if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } if (*word != '{') BADSCRIPT((char*)"IF-13 body must start with { instead of %s -- saw pattern %s\r\n",word,readBuffer,original) } ptr = ReadBody(word,ptr,in,data,rejoinders); // comes with space after it bodyends[bodyendIndex++] = data; // jump offset to end of if (backpatched) DummyEncode(data); // reserve space for offset after the closing ), which is how far to go past body strcpy(data++, " "); Encode((unsigned int)(data-ifbase),ifbase); // offset to ELSE or ELSE IF from body start // now see if ELSE branch exists ReadNextSystemToken(in,ptr,word,false,true); // else? if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } if (stricmp(word, (char*)"else")) { if (csapicall == COMPILE_OUTPUT && errorOnMissingElse) BADSCRIPT((char*)"Missing ELSE clause.\r\n"); break; // caller will add space after our jump index } // there is either else if or else ptr = ReadNextSystemToken(in,ptr,word,false,false); // swallow the else if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } strcpy(data,(char*)"else "); data += 5; ReadNextSystemToken(in,ptr,word,false,true); // see if or { if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } if (mapFile && dataBase && !livecall) { if (!stricmp(word,"if")) fprintf(mapFile, (char*)" elseif %u %u \r\n", currentFileLine, (unsigned int)(data - dataBase)); // readBuffer else fprintf(mapFile, (char*)" else %u %u \r\n", currentFileLine, (unsigned int)(data - dataBase)); // readBuffer } if (*word == '{') // swallow the ELSE body now since no IF - add fake successful test { // successful test condition for else *data++ = '('; *data++ = ' '; *data++ = '1'; *data++ = ' '; *data++ = ')'; strcpy(data++, " "); ifbase = data; DummyEncode(data);// reserve skip data strcpy(data++, " "); ptr = ReadBody(word,ptr,in,data,rejoinders); bodyends[bodyendIndex++] = data; // jump offset to end of if (backpatched) DummyEncode(data);// reserve space for offset after the closing ), which is how far to go past body Encode((unsigned int)(data-ifbase),ifbase); // offset to ELSE or ELSE IF from body start (accelerator) break; } else ++complexity; ptr = ReadNextSystemToken(in,ptr,word,false,false); // eat the IF if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } } if (*(data-1) == ' ') --data; // remove excess blank patternContext = false; if (mapFile && !livecall) { fprintf(mapFile, (char*)" ifend %u %u \r\n", currentFileLine, (unsigned int)(data - dataBase)); // readBuffer } // store offsets from successful bodies to the end while (bodyendIndex != 0) { char* at = bodyends[--bodyendIndex]; Encode((unsigned int)(data-at+1),at); // accerators on completion of if to end of whole if } *data = 0; return ptr; // we return with no extra space after us, caller adds it } static char* ReadLoop(char* word, char* ptr, FILE* in, char* &data,char* rejoinders,bool json) { priorLine = currentFileLine; char* original = data; if (json) { strcpy(data, (char*)"^jsonloop "); data += 10; } else { strcpy(data, (char*)"^loop "); data += 6; } if (mapFile && dataBase) { if (json) fprintf(mapFile, (char*)" jsonloop %u %u \r\n", currentFileLine, (unsigned int)(data - dataBase)); // readBuffer else fprintf(mapFile, (char*)" loop %u %u \r\n", currentFileLine, (unsigned int)(data - dataBase)); // readBuffer } ptr = ReadNextSystemToken(in,ptr,word,false,false); // ( if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } *data++ = '('; strcpy(data++, " "); if (*word != '(') BADSCRIPT((char*)"LOOP-1 count must be () or (count) -%s\r\n",word) ptr = ReadNextSystemToken(in,ptr,word,false,false); // counter - if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } if (*word == '^' && IsAlphaUTF8(word[1])) { WORDP D = FindWord(word, 0, LOWERCASE_LOOKUP); if (!D || !(D->internalBits & FUNCTION_NAME)) BADSCRIPT((char*)"%s is not the name of a local function argument\r\n", word) ReadNextSystemToken(in, ptr, nextToken, false, true); ptr = ReadCall(word, ptr, in, data, *nextToken == '(', false); // add function call ptr = ReadNextSystemToken(in, ptr, word, false, false); if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } } else if (*word == ')') { if (!json) strcpy(data, (char*)"-1"); // omitted, use -1 else BADSCRIPT("^JSONLOOP missing arguments") } else if (!stricmp(word,(char*)"-1") && !json) // precompiled previously -1 { strcpy(data,word); ptr = ReadNextSystemToken(in,ptr,word,false,false); // read closing paren if (*word != ')') BADSCRIPT((char*)"Loop counter %s was not closed by )\r\n",word); } else if (!json && !IsDigit(*word) && *word != USERVAR_PREFIX && *word != '_' && *word != SYSVAR_PREFIX && *word != '^' && *word != '@') BADSCRIPT((char*)"LOOP-2 counter must be $var, _#, %var, @factset or ^fnarg or function call -%s",word) else { strcpy(data,word); ptr = ReadNextSystemToken(in,ptr,word,false, false); if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } if (json) // 2 more args { data += strlen(data); strcpy(data++, " "); if (*word != '$' && *word != '_') BADSCRIPT((char*)"LOOP-2 control must be $var or matchvar", word) strcpy(data, word); if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } ptr = ReadNextSystemToken(in, ptr, word, false, false); // if (*word != '$' && *word != '_') BADSCRIPT((char*)"LOOP-2 control must be $var or matchvar", word) data += strlen(data); strcpy(data++, " "); strcpy(data, word); if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } ptr = ReadNextSystemToken(in, ptr, word, false, false); } } data += strlen(data); strcpy(data++, " "); if (*word != ')' && stricmp(word,"new") && stricmp(word,"old")) BADSCRIPT((char*)"LOOP-3 control must end with ) or NEW or OLD -%s\r\n", word) if (!stricmp(word, "new") || !stricmp(word, "old")) { strcpy(data, word); data += 3; strcpy(data++, " "); } *data++ = ')'; strcpy(data++, " "); char* loopstart = data; DummyEncode(data); // reserve loop jump to end accelerator strcpy(data++, " "); // Encode(loopCounter,data,2); // now do body ReadNextSystemToken(in,ptr,word,false,true); if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } if (*word != '{') // does it have precompiled accelerator { char hold[MAX_WORD_SIZE]; strcpy(hold,word); ptr = ReadNextSystemToken(in,ptr,word,false,false); ReadNextSystemToken(in,ptr,word,false,true); if (priorLine != currentFileLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } if (*word != '{') BADSCRIPT((char*)"LOOP-4 body must start with { -%s\r\n",hold) } char* bodystart = data; ptr = ReadBody(word,ptr,in,data,rejoinders); if (bodystart[0] == '{' && bodystart[1] == ' ' && bodystart[2] == '}') BADSCRIPT((char*)"LOOP-4 body makes no sense being empty\r\n") Encode((unsigned int)(data - loopstart),loopstart); // offset to body end from body start (accelerator) *data = 0; if (mapFile ) { fprintf(mapFile, (char*)" loopend %u %u \r\n", currentFileLine, (unsigned int)(data - dataBase)); // readBuffer } return ptr; // caller adds extra space after } static char* ReadJavaScript(FILE* in, char* &data,char* ptr) { strcpy(data,"*JavaScript"); data += strlen(data); strcpy(data++, " "); strcpy(data,ptr); data += strlen(data); char word[MAX_WORD_SIZE]; while (ReadALine(readBuffer,in) >= 0) { char* comment = strstr(readBuffer,"//"); if (comment) *comment = 0; // erase comments to end of line if (strstr(readBuffer,"/*")) BADSCRIPT("Cannot use /* ... */ comments in CS JavaScript: %s\r\n", readBuffer); char* ptr = SkipWhitespace(readBuffer); if (!*ptr) continue; ReadCompiledWord(ptr,word); if (TopLevelUnit(word) || !stricmp(word,(char*)"datum:")) break; strcpy(data++, " "); strcpy(data,ptr); data += strlen(data); } return readBuffer; } char* ReadOutput(bool optionalBrace,bool nested,char* ptr, FILE* in,char* &mydata,char* rejoinders,char* supplement,WORDP call, bool choice) { priorLine = currentFileLine; char* originalptr = ptr; char* oldOutputStart = outputStart; // does not matter if script error grabs control dataChunk = mydata; // global visible for use when changing lines for mapping if (!nested) { lineStart = dataChunk; // where line begins outputStart = dataChunk; } char* original = dataChunk; *dataChunk = 0; int bracket = 0; int paren = 0; int squiggle = 0; char* startparen = 0; char* startsquiggle = 0; char* startbracket = 0; int startpline = 0; int startbline = 0; int startsline = 0; char word[MAX_WORD_SIZE * 4]; char assignlhs[MAX_WORD_SIZE]; *assignlhs = 0; *assignKind = 0; int level = 0; int insert = 0; bool oldContext = patternContext; patternContext = false; char hold[MAX_WORD_SIZE]; *hold = 0; char startkind = 0; if (choice) { startkind = '['; ++level; startbracket = ptr; startbline = currentFileLine; } int pendingmathassign = 0; bool start = true; bool needtofield = false; bool javascript = false; while (ALWAYS) // read as many tokens as needed to complete the responder definition { if ((dataChunk-original) >= MAX_JUMP_OFFSET) BADSCRIPT((char*)"OUTPUT-1 code exceeds size limit of %d bytes\r\n",MAX_JUMP_OFFSET) if (*hold) // pending assignment code { if (*hold == '=') { pendingmathassign = 1; strcpy(word,(char*)"="); memmove(hold,hold+1,strlen(hold)); } else { strcpy(word,hold); *hold = 0; } } else if (supplement && *supplement) { strcpy(word,supplement); supplement = NULL; } else ptr = ReadNextSystemToken(in,ptr,word,false); if (!*word) break; // end of file if (!strcmp(word,"==")) WARNSCRIPT((char*)"== used in output. Did you want assignment = ?\r\n") if (currentFileLine != priorLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } if (start && !stricmp(word,"javascript")) { ptr = ReadJavaScript(in,dataChunk,ptr); javascript = true; break; } if (*word == USERVAR_PREFIX) // jammed together asignment? { char* assign = strchr(word,'='); if (assign) { strcpy(hold,assign); *assign = 0; } } if (insert) --insert; MakeLowerCopy(lowercaseForm,word); if (*word == '#' && word[1] == '!') // special comment { ptr -= strlen(word); // let someone else see this also // safe break; } if (*word == 'a' && word[2] == 0 && (word[1] == ';' || word[1] == '"' || word[1] == '\'' ) ) WARNSCRIPT((char*)"Is %s supposed to be a rejoinder marker?\r\n",word,currentFilename); if (*word == '}' && level == 0 && !optionalBrace) BADSCRIPT("extra } closing nothing") if ((*word == '}' && level == 0 && optionalBrace) || TopLevelUnit(word) || TopLevelRule(lowercaseForm) || Rejoinder(lowercaseForm) || !stricmp(word,(char*)"datum:")) // responder definition ends when another major unit or top level responder starts { if (*word != ':') // allow commands here { ptr -= strlen(word); // let someone else see this starter also // safe break; } else if (level >= 1) { if (startkind == '[') BADSCRIPT((char*)"CHOICE-2 Fail to close code started with %s upon seeing %s\r\n", originalptr,word) else BADSCRIPT((char*)"BODY-1 Fail to close code started with %s upon seeing %s\r\n", originalptr,word) } } ReadNextSystemToken(in,ptr,nextToken,false,true); // caching request if (pendingmathassign && *word != '(') pendingmathassign = 0; // turn off math assign if ((*word == '$' || *word == '_') && IsAssignOp(nextToken)) pendingmathassign = 1; if (pendingmathassign && (!stricmp(word, "and") || !stricmp(word, "or"))) { BADSCRIPT("%s is not a legal math operator. CS has no boolean type.",word) } if (!startkind) startkind = *word; // may be ( or { or [ or other switch(*word) { case '{': ++level; if (!squiggle++) { startsquiggle = ptr; startsline = currentFileLine; } break; case '(': ++level; if (!paren++) { startparen = ptr; startpline = currentFileLine; } break; case '[': startbracket = ptr; startbline = currentFileLine; ptr = ReadChoice(word,ptr,in,dataChunk,rejoinders); // but might have been json array ref continue; case ')': case ']': case '}': if (*word == '}') { --squiggle; if (!squiggle && startkind == '{') // closing level { if (paren) BADSCRIPT((char*)"BODY-3 Fail to close ( on line %d - (%s \r\n",startpline,startparen) if (bracket) BADSCRIPT((char*)"BODY-2 Fail to close [ on line %d - [%s \r\n",startbline,startbracket) } } else if (*word == ')') { --paren; if (!paren && startkind == '(') // closing level { if (squiggle) BADSCRIPT((char*)"BODY-3 Fail to close { on line %d - (%s \r\n",startsline,startsquiggle) if (bracket) BADSCRIPT((char*)"BODY-2 Fail to close [ on line %d - [%s \r\n",startbline,startbracket) } } else if (*word == ']') { --bracket; if (!bracket && startkind == '[') // closing level { if (squiggle) BADSCRIPT((char*)"BODY-3 Fail to close { on line %d - (%s \r\n",startsline,startsquiggle) if (paren) BADSCRIPT((char*)"BODY-2 Fail to close ( on line %d - [%s \r\n",startpline,startparen) } } --level; *dataChunk = 0; if (level < 0) BADSCRIPT((char*)"OUTPUT-3 Unbalanced %s in %s\r\n", word, outputStart) else if (!level && (startkind == '{' || startkind == '[' || startkind == '(')) { strcpy(dataChunk,word); dataChunk += strlen(dataChunk); if (startkind != '(' ) *word = 0; // end loop for bodies and choices } break; case '\'': strcpy(dataChunk,word); dataChunk += strlen(dataChunk); if (*word == '\'' && word[1] == 's' && !word[2] && IsAlphaUTF8OrDigit(*nextToken) ) *dataChunk++ = ' '; else if (word[1] == 0 && (*ptr == '_' || IsAlphaUTF8(*ptr) )) {;} // if isolated like join(' _1) then add space else *dataChunk++ = ' '; continue; case '@': if (!IsDigit(word[1]) && word[1] != '_') // not factset nor place locator BADSCRIPT((char*)"OUTPUT-4 bad factset reference - %s\r\n",word) if (!stricmp(nextToken,(char*)"+=") || !stricmp(nextToken,(char*)"-=") ) insert = 2; break; } if (!*word) break; // end of body if (!stricmp("$cs_botid", assignlhs) && IsDigit(word[0])) { // bot id declaration macroid = atoi64(word); } if (*assignlhs) // during continued assignment? { if (!stricmp(word,(char*)"^") || !stricmp(word,(char*)"|") || !stricmp(word,(char*)"&") || (!stricmp(word,(char*)"+") || !stricmp(word,(char*)"-") || !stricmp(word,(char*)"*") || !stricmp(word,(char*)"/"))) { if (!stricmp(nextToken,assignlhs)) { WARNSCRIPT((char*)"Possibly faulty assignment. %s has changed value during prior assignment.\r\n",assignlhs) *assignlhs = 0; } } else if (!stricmp(nextToken,(char*)"^") || !stricmp(nextToken,(char*)"|") || !stricmp(nextToken,(char*)"&") || !stricmp(nextToken,(char*)"+") || !stricmp(nextToken,(char*)"-") || !stricmp(nextToken,(char*)"*") || !stricmp(nextToken,(char*)"/")) {} else *assignlhs = 0; } char* nakedNext = nextToken; if (*nakedNext == '^') ++nakedNext; // word w/o ^ char* nakedWord = word; if (*nakedWord == '^') ++nakedWord; // word w/o ^ if (*word == '^') { if (!stricmp(word,"^if") || !stricmp(word,"^loop") || !stricmp(word, "^jsonloop")) {;} else if (*nextToken != '(' && word[1] != '^' && word[1] != '=' && word[1] != USERVAR_PREFIX && word[1] != '_' && word[1] != '"' && word[1] != '\'' && !IsDigit(word[1])) BADSCRIPT((char*)"%s either references a function w/o arguments or names a function variable that doesn't exist started at line %d col %d\r\n",word,currentFileLine,currentLineColumn - strlen(word)) } // note left hand of assignment if (IsComparator(nextToken)) strcpy(assignlhs,word); if (*nextToken == '=' && !nextToken[1]) // simple assignment { *assignKind = 0; strcpy(dataChunk,word); // add simple item into data dataChunk += strlen(dataChunk); *dataChunk++ = ' '; ptr = ReadNextSystemToken(in,ptr,nextToken,false,false); // use up lookahead of = strcpy(dataChunk,(char*)"="); ++dataChunk; *dataChunk++ = ' '; ReadNextSystemToken(in,ptr,nextToken,false,true); // aim lookahead at followup if (!stricmp(nakedNext,(char*)"first") || !stricmp(nakedNext,(char*)"last") || !stricmp(nakedNext,(char*)"random") || !stricmp(nakedNext,(char*)"nth") ) strcpy(assignKind,word); // verify usage fact retrieved from set if (*nextToken == '=' || *nextToken == '<' || *nextToken == '>') { if (!IsAlphaUTF8(nextToken[1])) WARNSCRIPT((char*)"Possibly assignment followed by another binary operator") } // assigning to variable only works if tofield value is given if (*word == USERVAR_PREFIX && (!stricmp(nextToken,"^query") || !stricmp(nextToken,"query"))) needtofield = true; continue; } else if (*nextToken == '{' && !stricmp(nakedWord,(char*)"loop")) // loop missing () { ptr = ReadLoop(word,ptr,in,dataChunk,rejoinders,false); continue; } else if (*nextToken == '{' && !stricmp(nakedWord, (char*)"jsonloop")) // loop missing () { ptr = ReadLoop(word, ptr, in, dataChunk, rejoinders,true); continue; } else if (*nextToken != '(') // doesnt look like a function { } else if (!stricmp(nakedWord,(char*)"if")) // strip IF of ^ { ptr = ReadIf(word,ptr,in,dataChunk,rejoinders); *dataChunk++ = ' '; continue; } else if (!stricmp(nakedWord,(char*)"loop")) // strip LOOP of ^ { ptr = ReadLoop(word,ptr,in,dataChunk,rejoinders,false); continue; } else if (!stricmp(nakedWord, (char*)"jsonloop")) // strip LOOP of ^ { ptr = ReadLoop(word, ptr, in, dataChunk, rejoinders,true); continue; } else if (*word != '^' && (!call || stricmp(call->word,(char*)"^createfact"))) // looks like a call ... if its ALSO a normal word, presume it is not a call, like: I like (American) football { // be wary.. respond(foo) might have been text... // How does he TELL us its text? interpret normal word SPACE ( as not a function call? char rename[MAX_WORD_SIZE]; *rename = '^'; strcpy(rename+1,word); // in case user omitted the ^ MakeLowerCase(rename); WORDP D = FindWord(rename,0,PRIMARY_CASE_ALLOWED); if (D && D->internalBits & FUNCTION_NAME) // it is a function { // is it also english. If builtin function, do that for sure // if user function AND english, more problematic. maybe he forgot WORDP E = FindWord(word); if (!E || !(E->properties & PART_OF_SPEECH) || D->x.codeIndex) strcpy(word,rename); // a recognized call else if (*ptr == '(') strcpy(word,rename); // use his spacing to decide } } if (currentFileLine != priorLine) { AddMapOutput(priorLine); priorLine = currentFileLine; } // a function call, if (*word == '^' && !IsDigit(word[1]) && word[1] != '^'&& word[1] != '=' && word[1] != '"' && word[1] != '\'' && word[1] != USERVAR_PREFIX && word[1] != '_' && word[1] && *nextToken == '(' ) { ptr = ReadCall(word,ptr,in,dataChunk,*nextToken == '(',needtofield); // add function call needtofield = false; *assignKind = 0; } else if (*word == '^' && IsDigit(word[1]) ) // fn var { strcpy(dataChunk,word); // add simple item into data dataChunk += strlen(dataChunk); } else { if (*word == '~' ) CheckSetOrTopic(word); if (IsAlphaUTF8(*word) && spellCheck == OUTPUT_SPELL) SpellCheckScriptWord(word,-1,true); strcpy(dataChunk,word); // add simple item into data dataChunk += strlen(dataChunk); } *dataChunk++ = ' '; } while (*(dataChunk-1) == ' ') *--dataChunk = 0; *dataChunk++ = ' '; *dataChunk = 0; // now verify no choice block exceeds CHOICE_LIMIT and that each [ is closed with ] if (!javascript) while (*original) { original = ReadCompiledWord(original,word); if (*original != '[') continue; unsigned int count = 0; char* at = original; while (*at == '[') { // find the closing ] while (ALWAYS) { at = strchr(at+1,']'); // find closing ] - we MUST find it (check in initsql) if (!at) BADSCRIPT((char*)"OUTPUT-5 Failure to close [ choice\r\n") if (*(at-2) != '\\') break; // found if not a literal \[ } ++count; at += 2; // at next token } if (count >= (CHOICE_LIMIT - 1)) BADSCRIPT((char*)"OUTPUT-6 Max %d choices in a row\r\n",CHOICE_LIMIT) original = at; } patternContext = oldContext; outputStart = oldOutputStart; mydata = dataChunk; return ptr; } static void ReadTopLevelRule(WORDP topicName, char* typeval,char* &ptr, FILE* in,char* data,char* basedata) {// handles 1 responder/gambit + all rejoinders attached to it char type[100]; complexity = 1; strcpy(type,typeval); char info[400]; char kind[MAX_WORD_SIZE]; char tname[MAX_WORD_SIZE]; strcpy(tname, topicName->word); char* tilde = strchr(tname + 1, '~'); if (tilde) *tilde = 0; // remove dup index strcpy(kind,type); char word[MAX_WORD_SIZE]; char rejoinders[256]; // legal levels a: thru q: memset(rejoinders,0,sizeof(rejoinders)); // rejoinders == 1 is normal, 2 means authorized in [] 3 means authorized and used *rejoinders = 1; // we know we have a responder. we will see about rejoinders later while (ALWAYS) // read responser + all rejoinders { MakeLowerCase(kind); char* original = data; int level = 0; // validate rejoinder is acceptable if (Rejoinder(kind)) { complexity = 1; int count = level = *kind - 'a' + 1; // 1 ... rejoinders[level] = 1; // we are now at this level, enables next level // levels not authorized by [b:][g:] etc are disabled while (++count < 20) { if (rejoinders[count] == 1) rejoinders[count] = 0; } currentRuleID += ONE_REJOINDER; WriteVerify(""); } strcpy(data,kind); data += 2; strcpy(data++, " "); bool patternDone = false; #ifdef INFORMATION A responder of any kind consists of a prefix of `xx spot xx is an encoded jump offset to go the the end of the responder. Then it has the kind item (t: s: etc). Then a space. Then one of 3 kinds of character: a. a (- indicates start of a pattern b. a space - indicates no pattern exists c. a 1-byte letter jump code - indicates immediately followed by a label and the jump code takes you to the ( #endif char label[MAX_WORD_SIZE]; char labelName[MAX_WORD_SIZE]; *label = 0; *labelName = 0; bool verified = false; while (ALWAYS) // read as many tokens as needed to complete the responder definition { ptr = ReadNextSystemToken(in,ptr,word,false); if (!*word) break; MakeLowerCopy(lowercaseForm,word); size_t len = strlen(word); if (TopLevelUnit(word) || TopLevelRule(lowercaseForm) || !stricmp(word,(char*)"datum:")) { *word = 0; break;// responder definition ends when another major unit or top level responder starts } if (*word == '(') // found pattern, no label { sprintf(info," rule: %s.%u.%u %s",currentTopicName,TOPLEVELID(currentRuleID),REJOINDERID(currentRuleID),kind); AddMap(info,NULL,&mapRuleCount); // rule char* pstart = data; ptr = ReadPattern(ptr-1,in,data,false,false); // back up and pass in the paren for pattern patternDone = true; *word = 0; if (echorulepattern) Log(ECHOUSERLOG, "rule %s\r\n", pstart); break; } else // label or start of output { ReadNextSystemToken(in,ptr,nextToken,false,true); // peek what comes after if (*nextToken == '(' && (IsAlphaUTF8(*word) ||IsDigit(*word))) // label exists { if (!IsLegalName(word,true)) BADSCRIPT((char*)"? Illegal characters in rule label %s\r\n", word) char name[MAX_WORD_SIZE]; *name = '^'; strcpy(name+1,word); WORDP D = FindWord(name,0,LOWERCASE_LOOKUP); // all functions are lower case - all labels are uppercase) if (D && D->internalBits & FUNCTION_NAME && (*kind == GAMBIT || *kind == RANDOM_GAMBIT)) WARNSCRIPT((char*)"label: %s is a potential macro in %s. Add ^ if you want it treated as such.\r\n",word,currentFilename) else if (!stricmp(word,(char*)"if") || !stricmp(word,(char*)"loop") || !stricmp(word, (char*)"jsonloop")) WARNSCRIPT((char*)"label: %s is a potential flow control (if/loop/jsonloop) in %s. Add ^ if you want it treated as a control word.\r\n",word,currentFilename) sprintf(info," rule: %s.%u.%u-%s %s",currentTopicName,TOPLEVELID(currentRuleID),REJOINDERID(currentRuleID),name+1, kind); AddMap(info,NULL,&mapRuleCount); // rule MakeUpperCase(word); // labels are uppercase (topics and functions are lower case) char* bots = topicName->w.topicBots; if (!bots || !*bots) { bots = "*"; // general access } while (*bots) { char bot[MAX_WORD_SIZE]; bots = ReadCompiledWord(bots, bot); if (!*bot) break; sprintf(label, "%s.%s-%s", tname, word, bot ); MakeUpperCase(label); WORDP E = StoreWord(label, AS_IS); AddInternalFlag(E, LABEL); } strcpy(labelName,word); if (strchr(word,'.')) BADSCRIPT((char*)"RULE-2 Label %s must not contain a period\r\n",word) if (len > 160) BADSCRIPT((char*)"RULE-2 Label %s must be less than 160 characters\r\n",word) if (!verified) { WriteVerify(word); // dump any accumulated verification data before the rule verified = true; } int fulllen = len; if (len > 40) { int tens = len / 40; // how many 40s does it hold len -= (tens * 40); *data++ = (char) (tens + '*'); // detectable as a 2char label *data++ = (char)('0' + len + 2); // prefix attached to label } else *data++ = (char)('0' + len + 2); // prefix attached to label strcpy(data,word); data += fulllen; strcpy(data++, " "); ReadNextSystemToken(NULL,NULL,NULL); // drop lookahead token char* pstart = data; ptr = ReadPattern(ptr,in,data,false,false); // read ( for real in the paren for pattern patternDone = true; *word = 0; if (echorulepattern) Log(ECHOUSERLOG, "rule %s\r\n", pstart); } else // we were seeing start of output (no label and no pattern) for gambit, proceed to output { sprintf(info," rule: %s.%u.%u-%s %s",currentTopicName,TOPLEVELID(currentRuleID),REJOINDERID(currentRuleID),labelName,kind); AddMap(info,NULL,&mapRuleCount); // rule if (*type != GAMBIT && *type != RANDOM_GAMBIT) BADSCRIPT((char*)"RULE-3 Missing pattern for responder\r\n") strcpy(data++, " "); patternDone = true; // leave word intact to pass to readoutput } break; } } // END OF WHILE if (patternDone) { if (!verified) { WriteVerify(""); // dump any accumulated verification data before the rule verified = true; } dataBase = data; ptr = ReadOutput(false,false,ptr,in,data,rejoinders,word,NULL); dataBase = NULL; char complex[MAX_WORD_SIZE]; sprintf(complex," Complexity of rule %s.%u.%u-%s %s %u", currentTopicName,TOPLEVELID(currentRuleID),REJOINDERID(currentRuleID),labelName,kind,complexity); AddMap(NULL, complex, NULL); // complexity // data points AFTER last char added. Back up to last char, if blank, leave it to be removed. else restore it. while (*--data == ' '); *++data = ' '; strcpy(data+1,ENDUNITTEXT); // close out last topic item+ data += strlen(data); while (ALWAYS) // read all verification comments for next rule if any, getting the next real word token { ptr = ReadNextSystemToken(in,ptr,word,false); if (*word != '#' || word[1] != '!') break; ptr = AddVerify(word,ptr); } MakeLowerCopy(lowercaseForm,word); if (!*word || TopLevelUnit(word) || TopLevelRule(lowercaseForm) || !stricmp(word,(char*)"datum:")) { ptr -= strlen(word); // safe break;// responder definition ends when another major unit or top level responder starts } // word is a rejoinder type strcpy(kind,lowercaseForm); } else ReportBug((char*)"Prior script not complete- unexpected top level word %s after seeing %s", lowercaseForm, data - 20); } // did he forget to fill in any [] jumps for (unsigned int i = ('a'-'a'); i <= ('q'-'a'); ++i) { if (rejoinders[i] == 2) BADSCRIPT((char*)"RULE-4 Failed to define rejoinder %c: for responder just ended\r\n", i + 'a' - 1) } *data = 0; dataBase = NULL; } static void ErasePendingFunction(WORDP D,int functionArgumentCount) { HEAPREF list = undefinedCallThreadList; while (list) { uint64 functionNamex; uint64 filenamex; uint64 linex; HEAPREF oldlist = list; list = UnpackHeapval(list, functionNamex, filenamex, linex); if (!functionNamex) continue; // reference deleted previously char* functionData = (char*)functionNamex; uint64 bot = *(uint64*)functionData; if (bot != myBot || strcmp(D->word, functionData + 9)) continue; if (functionData[8] != functionArgumentCount && !(D->internalBits & VARIABLE_ARGS_TABLE)) BADSCRIPT( (char*)"Function %s wrong argument count %d expected %d given for bot %s \r\n", D->word, functionData[8], functionArgumentCount, scopeBotName); uint64* data = (uint64*)oldlist; data[1] = 0; // kill reference } } static char* ReadMacro(char* ptr,FILE* in,char* kind,unsigned int build,char* data) { bool table = !stricmp(kind,(char*)"table:"); // create as a transient notwrittentofile bool apimacro = !stricmp(kind, (char*)"apimacro:"); // create as a transient notwrittentofile if (!apimacro) *currentTopicName = 0; displayIndex = 0; complexity = 1; uint64 typeFlags = 0; if (!stricmp(kind,(char*)"tableMacro:") || table) typeFlags = IS_TABLE_MACRO; else if (!stricmp(kind,(char*)"outputMacro:") || apimacro) typeFlags = IS_OUTPUT_MACRO; else if (!stricmp(kind,(char*)"patternMacro:")) typeFlags = IS_PATTERN_MACRO; else if (!stricmp(kind,(char*)"dualMacro:")) typeFlags = IS_PATTERN_MACRO | IS_OUTPUT_MACRO; *macroName = 0; macroid = 0; functionArgumentCount = 0; char* d = AllocateBuffer(); char* revised = AllocateBuffer(); char* pack = data; int64 macroFlags = 0; int parenLevel = 0; WORDP D = NULL; bool gettingArguments = true; patternContext = false; char word[MAX_WORD_SIZE]; while (gettingArguments) // read as many tokens as needed to get the name and argumentList { ptr = ReadNextSystemToken(in,ptr,word,false); if (!*word) break; // end of file if (!*macroName) // get the macro name { if (*word == '^' || *word == '~') memmove(word,word+1,strlen(word)); // remove his ^ MakeLowerCase(word); if (!table && !IsAlphaUTF8(*word) ) BADSCRIPT((char*)"MACRO-1 Macro name must start alpha ^%s\r\n",word) if (apimacro && strncmp(word, "dp_", 3)) BADSCRIPT((char*)"MACRO-1 Macro name must start with ^dp_ - ^%s\r\n", word) if (table) { strcpy(macroName,(char*)"^tbl:"); strcat(macroName,word); Log(USERLOG,"Reading table %s\r\n",macroName); } else { if (!IsLegalName(word)) BADSCRIPT((char*)"MACRO-2 Illegal characters in function name %s\r\n",word) *macroName = '^'; strcpy(macroName+1,word); if (!apimacro) { Log(USERLOG, "Reading %s %s\r\n", kind, macroName); AddMap((char*)" macro:", macroName, &mapItemCount); } } D = StoreWord(macroName,AS_IS); if (D->x.codeIndex) BADSCRIPT((char*)"MACRO-3 may not redefine system function %s\r\n", word) if (D->w.fndefinition && D->internalBits & FUNCTION_NAME && !table) // must be different BOT ID { int64 bid; ReadInt64((char*)GetDefinition(D),bid); // have to allow multiple instances of boot bot if (bid == (int64)myBot && stricmp(D->word,"^csboot")) BADSCRIPT((char*)"MACRO-3 macro %s already defined\r\n",macroName) } continue; } if (parenLevel == 0 && !stricmp(word,(char*)"variable")) // putting "variable" before the args list paren allows you to NAME all args but get ones not supplied filled in with * (tables) or null (macros) { D->internalBits |= VARIABLE_ARGS_TABLE; continue; } if (parenLevel == 0 && !stricmp(word, (char*)"tab")) { D->internalBits |= TABBED; continue; } size_t len = strlen(word); if (TopLevelUnit(word)) // definition ends when another major unit starts { ptr -= len; // let someone else see this starter also break; } char* restrict = NULL; switch(*word) { case '(': if (parenLevel++ != 0) BADSCRIPT((char*)"MACRO-4 bad paren level in macro definition %s\r\n",macroName) continue; // callArgumentList open case ')': if (--parenLevel != 0) BADSCRIPT((char*)"MACRO-5 bad closing paren in macro definition %s\r\n",macroName) gettingArguments = false; break; case '$': // declaring local restrict = strchr(word, '.'); if (restrict) { if (!stricmp(restrict + 1, (char*)"KEEP_QUOTES") && (typeFlags == IS_TABLE_MACRO || typeFlags == IS_OUTPUT_MACRO)) macroFlags |= 1ull << functionArgumentCount; // a normal string where spaces are kept instead of _ (format string) else if (!stricmp(restrict + 1, (char*)"HANDLE_QUOTES")) { if (typeFlags != IS_OUTPUT_MACRO) BADSCRIPT((char*)"MACRO-? HANDLE_QUOTES only valid with OUTPUTMACRO or DUALMACRO - %s \r\n", word) if (functionArgumentCount > 15) { int64 flag = 1ull << (functionArgumentCount - 16); // outputmacros flag <<= 32; macroFlags |= flag; } else macroFlags |= 1ull << functionArgumentCount; // outputmacros } else if (!stricmp(restrict + 1, (char*)"COMPILE") && typeFlags == IS_TABLE_MACRO) { if (functionArgumentCount > 15) { int64 flag = (1ull << 16) << (functionArgumentCount - 16); // outputmacros flag <<= 32; macroFlags |= flag; } else macroFlags |= (1ull << 16) << functionArgumentCount; // a compile string " " becomes "^:" } else if (!stricmp(restrict + 1, (char*)"UNDERSCORE") && typeFlags == IS_TABLE_MACRO) { ; } // default for quoted strings is _ else if (typeFlags != IS_TABLE_MACRO && typeFlags != IS_OUTPUT_MACRO) BADSCRIPT((char*)"Argument restrictions only available on Table Macros or OutputMacros - %s \r\n", word) else BADSCRIPT((char*)"MACRO-? Table/Tablemacro argument restriction must be KEEP_QUOTES OR COMPILE or UNDERSCORE - %s \r\n", word) *restrict = 0; } if (typeFlags & IS_PATTERN_MACRO) BADSCRIPT((char*)"MACRO-? May not use locals in a pattern/dual macro - %s\r\n",word) if (word[1] != '_') BADSCRIPT((char*)"MACRO-? Variable name as argument must be local %s\r\n",word) if (strchr(word, '.') || strchr(word, '[')) { BADSCRIPT((char*)"MACRO-? Variable name as argument must be simple, not json reference %s\r\n", word) } AddDisplay(word); strcpy(functionArguments[++functionArgumentCount],word); if (functionArgumentCount > MAX_ARG_LIMIT) BADSCRIPT((char*)"MACRO-7 Too many callArgumentList to %s - max is %d\r\n",macroName,MAX_ARG_LIMIT) continue; case '^': // declaring a new argument if (IsDigit(word[1])) BADSCRIPT((char*)"MACRO-6 Function arguments must be alpha names, not digits like %s\r\n",word) restrict = strchr(word,'.'); if (restrict) { if (!stricmp(restrict+1,(char*)"KEEP_QUOTES") && (typeFlags == IS_TABLE_MACRO || typeFlags == IS_OUTPUT_MACRO)) macroFlags |= 1ull << functionArgumentCount; // a normal string where spaces are kept instead of _ (format string) else if (!stricmp(restrict+1,(char*)"HANDLE_QUOTES")) { if (typeFlags != IS_OUTPUT_MACRO) BADSCRIPT((char*)"MACRO-? HANDLE_QUOTES only valid with OUTPUTMACRO or DUALMACRO - %s \r\n",word) if (functionArgumentCount > 15) { int64 flag = 1ull << (functionArgumentCount - 16); // outputmacros flag <<= 32; macroFlags |= flag; } else macroFlags |= 1ull << functionArgumentCount; // outputmacros } else if (!stricmp(restrict+1,(char*)"COMPILE") && typeFlags == IS_TABLE_MACRO) { if (functionArgumentCount > 15) { int64 flag = (1ull << 16) << (functionArgumentCount - 16); // outputmacros flag <<= 32; macroFlags |= flag; } else macroFlags |= (11ull << 16) << functionArgumentCount; // a compile string " " becomes "^:" } else if (!stricmp(restrict+1,(char*)"UNDERSCORE") && typeFlags == IS_TABLE_MACRO) {;} // default for quoted strings is _ else if (typeFlags != IS_TABLE_MACRO && typeFlags != IS_OUTPUT_MACRO) BADSCRIPT((char*)"Argument restrictions only available on Table Macros or OutputMacros - %s \r\n",word) else BADSCRIPT((char*)"MACRO-? Table/Tablemacro argument restriction must be KEEP_QUOTES OR COMPILE or UNDERSCORE - %s \r\n",word) *restrict = 0; } else {}// default for quoted strings on argumet is UNDERSCORE { WORDP X = FindWord(word); if (X && X->internalBits & FUNCTION_NAME) BADSCRIPT((char*)"MACRO-8 Function argument %s is also name of a function\r\n",word); } AddDisplay(word); strcpy(functionArguments[++functionArgumentCount],word); if (functionArgumentCount > MAX_ARG_LIMIT) BADSCRIPT((char*)"MACRO-7 Too many callArgumentList to %s - max is %d\r\n",macroName,MAX_ARG_LIMIT) continue; default: BADSCRIPT((char*)"MACRO-7 Bad argument %s to macro definition %s\r\n",word,macroName) } } if (!D) { dataBase = NULL; return ptr; // nothing defined } AddInternalFlag(D,(unsigned int)(FUNCTION_NAME|build|typeFlags)); if (functionArgumentCount > 15) *pack++ = (unsigned char)(functionArgumentCount - 15 + 'a'); else *pack++ = (unsigned char)(functionArgumentCount + 'A'); // some 10 can be had ^0..^9 bool optionalBrace = false; currentFunctionDefinition = D; dataBase = NULL; if ( (typeFlags & FUNCTION_BITS) == IS_PATTERN_MACRO) { char* at = d; ptr = ReadPattern(ptr,in,at,true,false); *at = 0; // insert display and add body back pack = WriteDisplay(pack); strcpy(pack,d); pack += at - d; } else { ReadNextSystemToken(in,ptr,word,false,true); // check for optional display variables if (*word == '(') { ptr = ReadDisplay(in,ptr); ReadNextSystemToken(in,ptr,word,false,true); } if (*word == '{') // see if he used optional { syntax { ReadNextSystemToken(in,ptr,word,false); optionalBrace = true; } dataBase = d; if ((typeFlags & FUNCTION_BITS) == IS_PATTERN_MACRO) dataBase = NULL; // now read body of macro char* at = d; // if on same line we have issue? ptr = ReadOutput(optionalBrace,false,ptr,in,at,NULL,NULL,NULL); ReadNextSystemToken(in,ptr,word,true); if (optionalBrace && *word == '}') ptr = ReadNextSystemToken(in,ptr,word,false); else if (optionalBrace) BADSCRIPT("Missing closing optional brace in reading macro %s\r\n", macroName) *at = 0; // insert display and add body back pack = WriteDisplay(pack); strcpy(pack,d); pack += at - d; } *pack++ = '`'; // add closing marker to script *pack = 0; // record that it is a macro, with appropriate validation information char botid[MAX_WORD_SIZE]; strcpy(botid, "0"); // default universal for api compilations if (!apimacro) sprintf(botid, (char*)"%s", PrintU64(myBot)); revised[0] = revised[1] = revised[2] = revised[3] = 0; revised += 4; // empty link sprintf(revised, (char*)"%s %lld ", botid, macroFlags); strcat(revised, data); size_t len = strlen(revised) + 4; if (table && D->w.fndefinition) // need to link old definition to new one { unsigned int heapIndex = Heap2Index((char*)D->w.fndefinition); revised[0] = (heapIndex >> 24) & 0xff; revised[1] = (heapIndex >> 16) & 0xff; revised[2] = (heapIndex >> 8) & 0xff; revised[3] = heapIndex & 0xff; } if (!apimacro) D->w.fndefinition = (unsigned char*)AllocateHeap(revised - 4, strlen(revised) + 4); ErasePendingFunction(D,functionArgumentCount); if (apimacro) { revised -= 2; *revised = (D->internalBits & VARIABLE_ARGS_TABLE) ? 'O' : 'o'; revised[1] = ' '; *macroName = 0; dataBase = NULL; FreeBuffer(); FreeBuffer(); return revised; } if (!table) // tables are not real macros, they are temporary { char filename[SMALL_WORD_SIZE]; sprintf(filename,(char*)"%s/BUILD%s/macros%s.txt", topicfolder,baseName,baseName); // write out definition -- this is the real save of the data FILE* out = FopenUTF8WriteAppend(filename); if ((D->internalBits & FUNCTION_BITS) == IS_TABLE_MACRO) fprintf(out,(char*)"%s t %s\r\n",macroName,GetDefinition(D)); else if ((D->internalBits & FUNCTION_BITS) == (IS_OUTPUT_MACRO|IS_PATTERN_MACRO)) fprintf(out,(char*)"%s d %s\r\n",macroName,GetDefinition(D)); else { if (D->internalBits & VARIABLE_ARGS_TABLE) fprintf(out, (char*)"%s %c %s\r\n", macroName, ((D->internalBits & FUNCTION_BITS) == IS_OUTPUT_MACRO) ? 'O' : 'P', GetDefinition(D)); else fprintf(out, (char*)"%s %c %s\r\n", macroName, ((D->internalBits & FUNCTION_BITS) == IS_OUTPUT_MACRO) ? 'o' : 'p', GetDefinition(D)); } fclose(out); // dont use Fclose char complex[MAX_WORD_SIZE]; sprintf(complex, " Complexity of %s: %u", macroName, complexity); AddMap(NULL, complex, NULL); // complexity if (macroid != 0) { char name[MAX_WORD_SIZE]; sprintf(name, (char*)" bot: 0 %s %s ", macroName,PrintU64(macroid)); AddMap(NULL, name, NULL); // bot macro } } *macroName = 0; dataBase = NULL; FreeBuffer(); FreeBuffer(); return ptr; } static char* ReadTable(char* ptr, FILE* in,unsigned int build,bool fromtopic) { bool oldecho = echo; int oldtrace = trace; char name[MAX_WORD_SIZE]; char word[MAX_WORD_SIZE]; char post[MAX_WORD_SIZE]; char args[MAX_TABLE_ARGS+1][MAX_WORD_SIZE]; unsigned short quoteProcessing = 0; unsigned int indexArg = 0; char* pre = NULL; ptr = SkipWhitespace(ptr); ReadNextSystemToken(in,ptr,name,false,true); if (*name == '~') // table: ~x => table: ^x { *name = '^'; char* at = strchr(ptr, '~'); *at = '^'; } else if (*name != '^') // add function marker if it lacks one { memmove(name+1,name,strlen(name)+1); *name = '^'; } currentFunctionDefinition = FindWord(name); unsigned int sharedArgs; bool tableMacro = false; if (!currentFunctionDefinition) // go define a temporary tablemacro function since this is a spontaneous table Table: { if (fromtopic) BADSCRIPT((char*)"datum: from topic must use predefined table %s", name) char* data = AllocateBuffer(); ptr = ReadMacro(ptr,in,(char*)"table:",build,data); // defines the name,argumentList, and script FreeBuffer(); ptr = ReadNextSystemToken(in, ptr, word, false, false); // the DATA: separator if (stricmp(word,(char*)"DATA:")) BADSCRIPT((char*)"TABLE-1 missing DATA separator for table or corresponding tablemacro not yet defined- %s\r\n",word) sharedArgs = 0; } else // this is an existing table macro being executed { tableMacro = true; ptr = ReadNextSystemToken(in,ptr,word,false,false); // swallow function name ptr = ReadNextSystemToken(in,ptr,word,false,false); // swallow ( if (*word != '(') BADSCRIPT((char*)"TABLE-2 Must have ( before arguments") while (ALWAYS) // read argument values we supply to the existing tablemacro { ptr = ReadNextSystemToken(in,ptr,args[indexArg],false,false); if (*args[indexArg] == ')') break; if (*args[indexArg] == '^' && args[indexArg][1] != '"') BADSCRIPT((char*)"TABLE-3 TableMacro %s requires real args, not redefinition args",currentFunctionDefinition->word) if (++indexArg >= MAX_TABLE_ARGS) BADSCRIPT((char*)"TABLE-4 too many table args\r\n") } sharedArgs = indexArg; } unsigned char* defn = GetDefinition(currentFunctionDefinition); unsigned int wantedArgs = MACRO_ARGUMENT_COUNT(defn); char junk[MAX_WORD_SIZE]; defn = (unsigned char*) ReadCompiledWord((char*)defn, junk); // read bot id int flags; ReadInt((char*)defn, flags); quoteProcessing = (short int) flags; // values of KEEP_QUOTES for each argument // now we have the function definition and any shared arguments. We need to read the real arguments per table line now and execute. convertTabs = (currentFunctionDefinition->internalBits & TABBED) ? false : true; char* argumentList = AllocateBuffer(); tableinput = NULL; ++jumpIndex; int holdDepth = globalDepth; char* xxbase = ptr; // debug hook tableinput = AllocateBuffer(); while (ALWAYS) { if (setjmp(scriptJump[jumpIndex])) // flush on error { ptr = FlushToTopLevel(in,holdDepth,0); break; } ptr = ReadNextSystemToken(in,ptr,word,false,false); // real token read char* original = ptr - strlen(word); if (*word == '\\' && word[1] == 'n') continue; // newline means pretend new table entry if (!stricmp(word, (char*)":debug")) { DebugCode(word); continue; } if (!stricmp(word, (char*)":quit")) { myexit("table has :quit"); } if (*word == ':' && word[1]) // debug command { ptr = original; // safe char output[MAX_WORD_SIZE]; DoCommand(ptr,output); *ptr = 0; continue; } if (!*word || TopLevelUnit(word) || TopLevelRule(word)) // end { ptr = original; // safe break; } // process a data set from the line char* systemArgumentList = argumentList; *systemArgumentList++ = '('; *systemArgumentList++ = ' '; unsigned int argCount = 0; // common arguments processing for (unsigned int i = 0; i < sharedArgs; ++i) { if (*args[i] == '^' && args[i][1] == '"') { FunctionResult result; char* oldoutputbase = currentOutputBase; currentOutputBase = systemArgumentList; ReformatString(args[i][1],args[i]+2,systemArgumentList,result); currentOutputBase = oldoutputbase; } else strcpy(systemArgumentList,args[i]); systemArgumentList += strlen(systemArgumentList); *systemArgumentList++ = ' '; ++argCount; } // now fill in args of table data from a single line char* choiceArg = NULL; // the multiple interior bool startup = true; int holdtrace = trace; trace = 0; strcpy(tableinput, readBuffer); while (ALWAYS) { if (!startup) ptr = ReadSystemToken(ptr,word); // next item to associate if (!stricmp(word,(char*)":debug")) { DebugCode(word); continue; } if (!stricmp(word, (char*)":trace")) { trace = (unsigned int) -1; echo = true; continue; } startup = false; if (!*word) break; // end of LINE of items stuff if (*word == '\t') *word = '*'; // tab forces fill with * if (!stricmp(word,(char*)"...")) break; // pad to end of arg count if (!stricmp(word,(char*)"\\n")) // fake end of line { memmove(readBuffer,ptr,strlen(ptr)+1); // erase earlier stuff we've read ptr = readBuffer; break; } if (*word == '[' ) // choice set (one per line allowed) { if (choiceArg) BADSCRIPT((char*)"TABLE-5 Only allowed 1 multiple choice [] arg\r\n") pre = systemArgumentList; // these are the fixed arguments before the multiple choice one choiceArg = ptr; // multiple choices arg char* at = strchr(ptr,']'); // find end of multiple choice if (!at) BADSCRIPT((char*)"TABLE-6 bad [ ] ending %s in table %s\r\n",readBuffer,currentFunctionDefinition->word) ptr = at + 1; // continue fixed argumentList AFTER the multiple choices set (but leave blank if there) ++argCount; continue; // skipping over this arg, move on to next arg now. } uint64 flag = 0; // how do we store string arguments - with underscores, as is, compiled, bool keepQuotes = (quoteProcessing & ( 1 << argCount)) ? 1 : 0; // want to use quotes and spaces, instead of none and convert to _ which is the default // a normal string where spaces are kept instead of _ (format string) bool xxotherNotation = (quoteProcessing & ( (1 << 16) << argCount)) ? 1 : 0; // unused at present if (*word == FUNCTIONSTRING && (word[1] == '"' || word[1] == '\'')) { strcpy(word,CompileString(word)); // no underscores in string, compiled as executable // a compile string " " becomes "^:" flag = AS_IS; } else if (*word == '"' && keepQuotes) // no underscores in string, preserve string. Quotes needed to detect as single argument for fact creation { flag = AS_IS; } else { unsigned int n = BurstWord(word,(*word == '"') ? POSSESSIVES : 0); strcpy(word,JoinWords(n)); // by default strings are stored with _, pretending they are composite words. if (n > 1) flag = AS_IS; } if ( *word == '\\') memmove(word,word+1,strlen(word)); // remove escape if (*word == '"' && !word[1]) BADSCRIPT((char*)"TABLE-? isolated doublequote argument- start of string not recognized?\r\n"); if (flag != AS_IS && *word != '"' && strstr(word,(char*)" ")) BADSCRIPT((char*)"TABLE-7 unexpected space in string %s - need to use doublequotes around string\r\n",word); WORDP baseWord = StoreWord(word,flag); strcpy(word,baseWord->word); strcpy(systemArgumentList,word); systemArgumentList += strlen(systemArgumentList); *systemArgumentList++ = ' '; ++argCount; // handle synonyms as needed MEANING base = MakeMeaning(baseWord); if (convertTabs) ptr = SkipWhitespace(ptr); if (*ptr == '(' && ++ptr) while (ALWAYS) // synonym listed, create a fact for it { ptr = ReadSystemToken(ptr,word); if (!*word || *word == '[' || *word == ']') BADSCRIPT((char*)"TABLE-8 Synomym in table %s lacks token\r\n",currentFunctionDefinition->word) if (*word == ')') break; // end of synonms strcpy(word,JoinWords(BurstWord(word,CONTRACTIONS))); if (IsUpperCase(*word)) CreateFact(MakeMeaning(StoreWord(word,NOUN|NOUN_PROPER_SINGULAR)),Mmember,base); else CreateFact(MakeMeaning(StoreWord(word,NOUN|NOUN_SINGULAR)),Mmember,base); } if ((wantedArgs - sharedArgs) == 1) { memmove(readBuffer,ptr,strlen(ptr)+1); ptr = readBuffer; break; } } while ( argCount < wantedArgs && (!stricmp(word,(char*)"...") || currentFunctionDefinition->internalBits & VARIABLE_ARGS_TABLE)) { strcpy(systemArgumentList,(char*)"*"); systemArgumentList += strlen(systemArgumentList); *systemArgumentList++ = ' '; ++argCount; } *systemArgumentList = 0; *post = 0; if (choiceArg) strcpy(post,pre); // save argumentList after the multiple choices // now we have one map of the argumentList row if (argCount && argCount != wantedArgs) BADSCRIPT((char*)"TABLE-9 Bad table %s in table %s, want %d arguments and have %d\r\n",original,currentFunctionDefinition->word, wantedArgs,argCount) // table line is read, now execute rules on it, perhaps multiple times, after stuffing in the choice if one if (argCount) // we swallowed a dataset. Process it { while (ALWAYS) { // prepare variable argumentList if (choiceArg) // do it with next multi { choiceArg = ReadSystemToken(choiceArg,word); // get choice if (!*word || *word == ']') break; // end of multiple choice unsigned int control = 0; if (*word == FUNCTIONSTRING && word[1] == '"') strcpy(word,CompileString(word)); // readtable else strcpy(word,JoinWords(BurstWord(word,CONTRACTIONS|control))); strcpy(word,StoreWord(word,(control) ? AS_IS : 0)->word); if (*word == '\'') // quoted value { choiceArg = ReadSystemToken(choiceArg,word); // get 1st of choice if (!*word || *word == ']') break; // end of LINE of items stuff ForceUnderscores(word); strcpy(pre,StoreWord(word)->word); // record the local w/o any set expansion } else { WORDP D = StoreWord(word); strcpy(pre,D->word); // record the multiple choice choiceArg = SkipWhitespace(choiceArg); if (*choiceArg == '(' && ++choiceArg) while(choiceArg) // synonym { choiceArg = ReadSystemToken(choiceArg,word); if (!*word) BADSCRIPT((char*)"TABLE-10 Failure to close synonym list in table %s\r\n",currentFunctionDefinition->word) if (*word == ')') break; // end of synonms ForceUnderscores(word); CreateFact(MakeMeaning(StoreWord(word)),Mmember,MakeMeaning(D)); } } char* at = pre + strlen(pre); *at++ = ' '; strcpy(at,post); // add rest of argumentList systemArgumentList = at + strlen(at); } *systemArgumentList++ = ')'; // end of call setup *systemArgumentList = 0; currentRule = NULL; FunctionResult result; AllocateOutputBuffer(); trace = holdtrace; if (trace) Log(USERLOG, "%s%s\r\n", currentFunctionDefinition->word, argumentList); DoFunction(currentFunctionDefinition->word, argumentList, currentOutputBase, result); trace = 0; FreeOutputBuffer(); if (!choiceArg) break; } } if (fromtopic) break; // one entry only } convertTabs = true; FreeBuffer(); tableinput = NULL; FreeBuffer(); // not required to happen if error happens if (!tableMacro) // delete dynamic function { currentFunctionDefinition->internalBits &= -1LL ^ FUNCTION_NAME; currentFunctionDefinition->w.fndefinition = NULL; AddInternalFlag(currentFunctionDefinition,DELETED_MARK); } currentFunctionDefinition = NULL; --jumpIndex; echo = oldecho; trace = oldtrace; return ptr; } static void SetJumpOffsets(char* data) // store jump offset for each rule { char* at = data; char* end = data; while (*at && *++at) // find each responder end { if (*at == ENDUNIT) { int diff = (int)(at - end + 1); if (diff > MAX_JUMP_OFFSET) BADSCRIPT((char*)"TOPIC-9 Jump offset too far - %d but limit %d near %s\r\n",diff,MAX_JUMP_OFFSET,readBuffer) // limit 2 char (12 bit) Encode(diff,end); end = at + 1; } } } static char* ReadKeyword(char* word,char* ptr,bool& notted, int& quoted,MEANING concept,uint64 type,bool ignoreSpell,unsigned int build,bool duplicate,bool startOnly,bool endOnly,bool emoticon) { if (emoticon) type |= EMOJI; // read the keywords zone of the concept char* at; MEANING M; WORDP D; size_t len = strlen(word); switch(*word) { case '!': // excuded keyword if (len == 1) BADSCRIPT((char*)"CONCEPT-5 Must attach ! to keyword in %s\r\n",Meaning2Word(concept)->word); if (word[1] == '!') BADSCRIPT((char*)"CONCEPT-5 Cannot use ! after !! in %s\r\n", Meaning2Word(concept)->word); notted = true; ptr -= len; if (*ptr == '!') ++ptr; break; case '\'': if (len == 1) BADSCRIPT((char*)"CONCEPT-6 Must attach ' to keyword in %s\r\n",Meaning2Word(concept)->word); if (word[1] == '\'') { if (word[2] == '\'') BADSCRIPT((char*)"CONCEPT-5 Cannot use ' after ' in %s\r\n", Meaning2Word(concept)->word); quoted = 2; } else quoted = 1; // since we emitted the ', we MUST emit the next token ptr -= len; if (*ptr == '\'') ++ptr; if (*ptr == '\'') ++ptr; break; default: if (*word == USERVAR_PREFIX || (*word == '_' && IsDigit(word[1])) || (*word == SYSVAR_PREFIX && IsLowerCase(word[1]) && IsLowerCase(word[2]))) BADSCRIPT((char*)"CONCEPT-? Cannot use $var or _var or %var as a keyword in %s\r\n",Meaning2Word(concept)->word); if (*word == '~') MakeLowerCase(word); // sets are always lower case at = strchr(word + 1, '~'); // wordnet meaning request, confirm definition exists if ((*word != '"' && *word != '\'') && at && stricmp(current_language, "english") && at[-1] != '*' && IsDigit(at[1])) *at = 0; if (*word == '"' && word[1] == '(')// pattern word { unsigned int flags = 0; if (build & BUILD1) flags |= FACTBUILD1; // concept facts from build 1 MEANING conceptPattern = MakeMeaning(StoreWord("conceptPattern", AS_IS)); size_t len = strlen(word); if (word[len-1] != '"') BADSCRIPT("ConceptPattern not closing with quote") if (word[len - 2] != ')') BADSCRIPT("ConceptPattern not closing with )") word[len - 1] = 0; char* data = AllocateBuffer() ; char* startData = data++; *startData = '^'; // compiled pattern marker ReadPattern(word+1, NULL, data, false, false); // back up and pass in the paren for pattern FreeBuffer(); M = MakeMeaning(StoreWord(startData, AS_IS)); CreateFact(M, conceptPattern, concept, flags); return ptr; } else if (at) // wordnet meaning request, confirm definition exists { char level[MAX_WORD_SIZE]; strcpy(level,at); M = ReadMeaning(word); if (!M) BADSCRIPT((char*)"CONCEPT-7 WordNet word doesn't exist %s\r\n",word) WORDP D = Meaning2Word(M); int index = Meaning2Index(M); if ((GetMeaningCount(D) == 0 && !(GETTYPERESTRICTION(M) & BASIC_POS)) || (index && !strcmp(word,D->word) && index > GetMeaningCount(D))) { if (index && !stricmp(current_language, "english")) WARNSCRIPT((char*)"WordNet word does not have such meaning %s\r\n", word) M &= -1 ^ INDEX_BITS; } } else // ordinary word or concept-- see if it makes sense { char end = word[strlen(word)-1]; if (nospellcheck) {} else if (!IsAlphaUTF8OrDigit(end) && end != '"' && strlen(word) != 1 && !nomixedcase) { if (IsPunctuation(end)); // no warnings for emoji else if (!IsAlphaUTF8DigitNumeric(end)); else if (end != '.' || strlen(word) > 6) WARNSCRIPT((char*)"last character of keyword %s is punctuation. Is this intended?\r\n", word) } else if (end == '"' && word[(strlen(word) - 2)] == ' ') BADSCRIPT((char*) "CONCEPT-? Keyword %s ends in illegal space\r\n", word) if (*word == '\\') memcpy(word,word+1,strlen(word)); // how to allow $e as a keyword M = ReadMeaning(word); D = Meaning2Word(M); uint64 type1 = type; if (type & NOUN_SINGULAR && D->internalBits & UPPERCASE_HASH) { type1 ^= NOUN_SINGULAR; type1 |= NOUN_PROPER_SINGULAR; } if (type) AddProperty(D,type1); // augment its type if (*D->word == '~') // concept { if (M == concept) BADSCRIPT((char*)"CONCEPT-8 Cannot include topic into self - %s\r\n",D->word); CheckSetOrTopic(D->word); } else if ( ignoreSpell || !spellCheck || strchr(D->word,'_') || !D->word[1] || D->internalBits & UPPERCASE_HASH) {;} // ignore spelling issues, phrases, short words && proper names else if (!(D->properties & PART_OF_SPEECH) && !(D->systemFlags & PATTERN_WORD)) { if (!(spellCheck & NO_SPELL)) SpellCheckScriptWord(D->word,-1,false); WriteKey(D->word); WritePatternWord(D->word); } } // end ordinary word unsigned int flags = 0; if (quoted == 1) flags |= ORIGINAL_ONLY; else if (quoted == 2) flags |= RAWCASE_ONLY; if (duplicate) flags |= FACTDUPLICATE; if (startOnly) flags |= START_ONLY; if (endOnly) flags |= END_ONLY; if (build & BUILD1) flags |= FACTBUILD1; // concept facts from build 1 FACT* F = CreateFact(M,(notted) ? Mexclude : Mmember,concept, flags); quoted = 0; notted = false; } return ptr; } bool HasBotMember(WORDP concept, uint64 id) { FACT* F = GetObjectHead(concept); while (F) { if (F->verb == Mmember) // manual ValidMemberFact(F) because bot id is passed in { // if language flags exist, confirm those unsigned int factlanguage = (F->flags & FACTLANGUAGEBITS) << LANGUAGE_SHIFT; // if we are in universal language, we can see all facts // if a fact is in universal language, it can be seen by any language - limit 7 languages if (factlanguage && language_bits && factlanguage != language_bits) return false; // limited to a specific bot // We ARE allowed to add to general in existing layer if (id && F->botBits & id) return true; // not allow generic fact? // we are general and it has general already if (!id) return true; } F = GetObjectNext(F); } return false; } static char* ReadBot(char* ptr) { *scopeBotName = ' '; ptr = SkipWhitespace(ptr); char* original = ptr; if (IsDigit(*ptr)) { int64 n; ptr = ReadInt64(ptr,n); // change bot id myBot = n; } MakeLowerCopy(scopeBotName+1,ptr); // presumes til end of line size_t len = strlen(scopeBotName); while (scopeBotName[len-1] == ' ') scopeBotName[--len] = 0; bool oldecho = echo; echo = true; if (len != 0) { strcat(scopeBotName, " "); // single trailing space char* x; while ((x = strchr(scopeBotName, ','))) *x = ' '; // change comma to space. all bot names have spaces on both sides } Log(USERLOG,"Reading bot restriction: %s\r\n", original); echo = oldecho; return ""; } static char* ReadTopic(char* ptr, FILE* in,unsigned int build) { patternContext = false; displayIndex = 0; char* data = (char*) mymalloc(MAX_TOPIC_SIZE); // use a big chunk of memory for the data if (!data) ReportBug("FATAL: ReadTopic malloc failed"); *data = 0; char* pack = data; ++topicCount; *currentTopicName = 0; unsigned int flags = 0; bool topicFlagsDone = false; bool keywordsDone = false; int parenLevel = 0; MEANING topicValue = 0; WORDP topicName = NULL; unsigned int gambits = 0; unsigned int toplevelrules = 0; // does not include rejoinders currentRuleID = 0; // reset rule notation verifyIndex = 0; bool notted = false; int quoted = 0; bool stayRequested = false; int buffercount = bufferIndex; int frameindex = globalDepth; if (setjmp(scriptJump[++jumpIndex])) { bufferIndex = buffercount; ptr = FlushToTopLevel(in, frameindex,data); // if error occurs lower down, flush to here } while (ALWAYS) // read as many tokens as needed to complete the definition { char word[MAX_WORD_SIZE]; ptr = ReadNextSystemToken(in,ptr,word,false); if (!*word) break; if (!*currentTopicName) // get the topic name { if (*word != '~') BADSCRIPT((char*)"Topic name - %s must start with ~\r\n",word) strcpy(currentTopicName,word); Log(USERLOG,"Reading topic %s\r\n",currentTopicName); topicName = FindWord(currentTopicName); if (topicName && (topicName->internalBits & CONCEPT)) WARNSCRIPT((char*)"TOPIC-1 Concept already defined with this topic name %s\r\n", currentTopicName) if (!IsLegalName(currentTopicName)) BADSCRIPT((char*)"TOPIC-2 Illegal characters in topic name %s\r\n", currentTopicName) topicName = StoreWord(currentTopicName); // note we have seen definition char cumulate[MAX_WORD_SIZE]; strcpy(cumulate, topicName->word); strcat(cumulate, "$"); char* bot = (*scopeBotName == ' ') ? (scopeBotName + 1) : scopeBotName; strcat(cumulate, bot); size_t l = strlen(cumulate); if (cumulate[l - 1] == ' ') cumulate[l - 1] = 0; // remove trailing blank WORDP alt = StoreWord(cumulate, AS_IS); alt->internalBits |= BEEN_HERE; // has been defined topicValue = MakeMeaning(topicName); // handle potential multiple topics of same name duplicateCount = 0; while (topicName->internalBits & TOPIC || topicName->objectHead)// allow concepts and topics to be referenced { ++duplicateCount; char name[MAX_WORD_SIZE]; sprintf(name,(char*)"%s%c%u",currentTopicName,DUPLICATETOPICSEPARATOR,duplicateCount); topicName = StoreWord(name); if (!*duplicateTopicName) strcpy(duplicateTopicName,currentTopicName); } strcpy(currentTopicName,topicName->word); AddMap((char*)" topic:", topicName->word, &mapItemCount); if (mapFileJson) fprintf(mapFileJson, (char*)",\r\n\"rules\" : [\r\n"); mapRuleCount = 0; AddInternalFlag(topicName,(unsigned int)(build|TOPIC)); topicName->w.topicBots = NULL; currentTopicBots = NULL; continue; } if (TopLevelUnit(word)) // definition ends when another major unit starts { ptr -= strlen(word); // let someone else see this starter also // safe break; } switch(*word) { case '(': case '[': if (!keywordsDone && topicFlagsDone) BADSCRIPT((char*)"TOPIC-3 Illegal bracking in topic keywords %s\r\n",word) if (flags & TOPIC_SHARE && flags & TOPIC_SYSTEM) BADSCRIPT((char*)"TOPIC-? Don't need SHARE on SYSTEM topic %s, it is already shared via system\r\n",currentTopicName) topicFlagsDone = true; // topic flags must occur before list of keywords ++parenLevel; if (!topicName->w.topicBots && *scopeBotName) topicName->w.topicBots = AllocateHeap(scopeBotName); break; case ')': case ']': --parenLevel; if (parenLevel == 0) { keywordsDone = true; ReadNextSystemToken(in,ptr,word,false,true); // check for optional display variables if (*word == '(') ptr = ReadDisplay(in,ptr); } break; case '#': if (*word == '#' && word[1] == '!') ptr = AddVerify(word,ptr); continue; default: MakeLowerCopy(lowercaseForm,word); if (!topicFlagsDone) // do topic flags { if (!strnicmp(word,(char*)"bot=",4)) // bot restriction on the topic { char botlist[MAX_WORD_SIZE]; MakeLowerCopy(botlist,word+4); char* x; while ((x = strchr(botlist,','))) *x = ' '; // change comma to space. all bot names have spaces on both sides topicName->w.topicBots = AllocateHeap(botlist,strlen(botlist)); // bot=harry,georgia,roger currentTopicBots = topicName->w.topicBots; } else if (!stricmp(word,(char*)"deprioritize")) flags |= TOPIC_LOWPRIORITY; else if (!stricmp(word,(char*)"noblocking")) flags |= TOPIC_NOBLOCKING; else if (!stricmp(word,(char*)"nopatterns") || !stricmp(word,(char*)"nopattern")) flags |= TOPIC_NOPATTERNS; else if (!stricmp(word,(char*)"nogambits") || !stricmp(word,(char*)"nogambit")) flags |= TOPIC_NOGAMBITS; else if (!stricmp(word,(char*)"nosamples") || !stricmp(word,(char*)"nosample")) flags |= TOPIC_NOSAMPLES; else if (!stricmp(word,(char*)"nokeys") || !stricmp(word,(char*)"nokeywords") ) flags |= TOPIC_NOKEYS; else if (!stricmp(word,(char*)"keep")) flags |= TOPIC_KEEP; else if (!stricmp(word,(char*)"norandom")) flags &= -1 ^TOPIC_RANDOM; else if (!stricmp(word,(char*)"normal")) flags &= -1 ^TOPIC_PRIORITY; else if (!stricmp(word,(char*)"norepeat")) flags &= -1 ^TOPIC_REPEAT; else if (!stricmp(word,(char*)"nostay")) flags |= TOPIC_NOSTAY; else if (!stricmp(word,(char*)"priority")) flags |= TOPIC_PRIORITY; else if (!stricmp(word,(char*)"random")) flags |= TOPIC_RANDOM; else if (!stricmp(word,(char*)"repeat")) flags |= TOPIC_REPEAT; else if (!stricmp(word,(char*)"safe")) flags |= TOPIC_SAFE; else if (!stricmp(word,(char*)"share")) flags |= TOPIC_SHARE; else if (!stricmp(word,(char*)"stay")) { flags &= -1 ^TOPIC_NOSTAY; stayRequested = true; } else if (!stricmp(word,(char*)"erase")) flags &= -1 ^TOPIC_KEEP; else if (!stricmp(word,(char*)"system")) { flags |= TOPIC_SYSTEM | TOPIC_KEEP | TOPIC_NOSTAY; if (stayRequested) BADSCRIPT((char*)"TOPIC-4 Topic %s cannot be both STAY and SYSTEM\r\n",currentTopicName) } else if (!stricmp(word,(char*)"user")); else BADSCRIPT((char*)"Bad topic flag %s for topic %s\r\n",word,currentTopicName) } else if (!keywordsDone) ptr = ReadKeyword(word,ptr,notted,quoted,topicValue,0,false,build,false,false,false,false);// absorb keyword list else if (!stricmp(word,(char*)"datum:")) // absorb a top-level data table line { ptr = ReadTable(ptr,in,build,true); } else if (TopLevelRule(lowercaseForm))// absorb a responder/gambit and its rejoinders { if (IsUpperCase(*word)) BADSCRIPT((char*)"Rule ID must be lower case: %s\r\n",word); ++toplevelrules; if (TopLevelGambit(word)) ++gambits; if (pack == data) { strcpy(pack,&ENDUNITTEXT[1]); // init 1st rule pack += strlen(pack); } ReadTopLevelRule(topicName,lowercaseForm,ptr,in,pack,data); currentRuleID = TOPLEVELID(currentRuleID) + 1; pack += strlen(pack); if ((pack - data) > (MAX_TOPIC_SIZE - 2000)) BADSCRIPT((char*)"TOPIC-4 Topic %s data too big. Split it by calling another topic using u: () respond(~subtopic) and putting the rest of the rules in that subtopic\r\n",currentTopicName) } else BADSCRIPT((char*)"Expecting responder for topic %s, got %s",currentTopicName,word) } } --jumpIndex; if (parenLevel) BADSCRIPT((char*)"TOPIC-5 Failure to balance ( in %s\r\n",currentTopicName) if (!topicName) BADSCRIPT((char*)"TOPIC-6 No topic name?\r\n") if (toplevelrules > MAX_TOPIC_RULES) BADSCRIPT((char*)"TOPIC-8 %s has too many rules- %d must be limited to %d. Call a subtopic.\r\n",currentTopicName,toplevelrules,MAX_TOPIC_RULES) size_t len = pack-data; SetJumpOffsets(data); if (displayIndex) { char display[MAX_WORD_SIZE * 10]; char* at = WriteDisplay(display); size_t displayLen = at - display; memmove(data+displayLen,data,len+1); // shift it all down + 1 for space separator replaceing string end len += displayLen; memmove(data,display,displayLen); } unsigned int checksum = ((unsigned int) Hashit((unsigned char*) data, len,hasUpperCharacters,hasUTF8Characters, hasSeparatorCharacters)) & 0x0fffffff; // trailing blank after jump code if (len >= (MAX_TOPIC_SIZE-100)) BADSCRIPT((char*)"TOPIC-7 Too much data in one topic\r\n") char filename[SMALL_WORD_SIZE]; sprintf(filename,(char*)"%s/BUILD%s/script%s.txt", topicfolder,baseName,baseName); FILE* out = FopenUTF8WriteAppend(filename); if (out) { // write out topic data char* restriction = (topicName->w.topicBots) ? topicName->w.topicBots : (char*)"all"; unsigned int len1 = (unsigned int)strlen(restriction); char qualname[100]; if (language_bits) { sprintf(qualname, "%s`-%d", currentTopicName,language_bits >> LANGUAGE_SHIFT); } else strcpy(qualname, currentTopicName); fprintf(out, (char*)"TOPIC: %s 0x%x %u %u %u %u %s\r\n", qualname, (unsigned int)flags, (unsigned int)checksum, (unsigned int)toplevelrules, (unsigned int)gambits, (unsigned int)(len + len1 + 7), currentFilename); fprintf(out, (char*)"\" %s \" ", restriction); fprintf(out, (char*)"%s\r\n", data); fclose(out); // dont use FClose } myfree(data); if (mapFileJson) fprintf(mapFileJson, (char*)"]\r\n}\r\n"); return ptr; } static char* ReadRename(char* ptr, FILE* in,unsigned int build) { renameInProgress = true; while (ALWAYS) // read as many tokens as needed to complete the definition { char word[MAX_WORD_SIZE]; char basic[MAX_WORD_SIZE]; ptr = ReadNextSystemToken(in,ptr,word,false); if (!*word) break; if (*word == '#' && (word[1] != '#' || !IsAlphaUTF8(word[2]))) { *ptr = 0; *readBuffer = 0; break; // comment ends it also } if (TopLevelUnit(word)) // definition ends when another major unit starts { ptr -= strlen(word); // let someone else see this starter also // safe break; } if (*word != '_' && *word != '@' && (*word != '#' || word[1] != '#')) BADSCRIPT((char*)"Rename %s must start with _ or @ or ##\r\n",word) ptr = ReadNextSystemToken(in,ptr,basic,false); if (*word != '#' && (*basic != *word || !IsDigit(basic[1]) )) BADSCRIPT((char*)"Rename %s must start same as %s and have a number after it\r\n",basic,word) if (*word == '#' && !IsDigit(*basic) && *basic != '-' && *basic != '+') BADSCRIPT((char*)"Rename %s followed by number or sign as %s\r\n",word,basic) MakeLowerCase(word); int64 n; if (*word == '#') { ReadInt64(basic,n); if (*basic == '-') n = -n; // force positive } else ReadInt64(basic+1,n); WORDP D = FindWord(word); if (D && !myBot) WARNSCRIPT((char*)"Already have a rename for %s\r\n", word) D = StoreWord(word,n); AddInternalFlag(D,(unsigned int)(RENAMED|build)); if (*word == '#' && *basic == '-') AddInternalFlag(D,CONSTANT_IS_NEGATIVE); Log(USERLOG,"Rename %s as %s\r\n",basic,word); } renameInProgress = false; return ptr; } static char* ReadPlan(char* ptr, FILE* in,unsigned int build) { char planName[MAX_WORD_SIZE]; char baseName[MAX_WORD_SIZE]; displayIndex = 0; *planName = 0; functionArgumentCount = 0; int parenLevel = 0; bool gettingArguments = true; endtopicSeen = false; patternContext = false; int baseArgumentCount = 0; unsigned int duplicateCount = 0; WORDP plan = NULL; while (gettingArguments) // read as many tokens as needed to get the name and argumentList { char word[MAX_WORD_SIZE]; ptr = ReadNextSystemToken(in,ptr,word,false); if (!*word) break; // end of file if (!*planName) // get the plan name { if (*word == '^') memmove(word,word+1,strlen(word)); // remove his ^ MakeLowerCase(word); if (!IsAlphaUTF8(*word) ) BADSCRIPT((char*)"PLAN-1 Plan name must start alpha %s\r\n",word) if (!IsLegalName(word)) BADSCRIPT((char*)"PLAN-2 Illegal characters in plan name %s\r\n",word) *planName = '^'; strcpy(planName+1,word); strcpy(baseName,planName); Log(USERLOG,"Reading plan %s\r\n",planName); // handle potential multiple plans of same name plan = FindWord(planName); char name[MAX_WORD_SIZE]; strcpy(name,planName); if (plan) baseArgumentCount = plan->w.planArgCount; while (plan && plan->internalBits & FUNCTION_NAME) { ++duplicateCount; sprintf(name,(char*)"%s%c%u",baseName,DUPLICATETOPICSEPARATOR,duplicateCount); plan = FindWord(name); strcpy(planName,name); } plan = StoreWord(planName); continue; } size_t len = strlen(word); if (TopLevelUnit(word)) // definition ends when another major unit starts { ptr -= len; // let someone else see this starter also break; } switch(*word) { case '(': if (parenLevel++ != 0) BADSCRIPT((char*)"PLAN-4 bad paren level in plan definition %s\r\n",planName) continue; // callArgumentList open case ')': if (--parenLevel != 0) BADSCRIPT((char*)"PLAN-5 bad closing paren in plan definition %s\r\n",planName) gettingArguments = false; break; case '^': // declaring a new argument if (IsDigit(word[1])) BADSCRIPT((char*)"PLAN-6 Plan arguments must be alpha names, not digits like %s\r\n",word) strcpy(functionArguments[++functionArgumentCount],word); if (functionArgumentCount > MAX_ARG_LIMIT) BADSCRIPT((char*)"PLAN-7 Too many callArgumentList to %s - max is %d\r\n",planName,MAX_ARG_LIMIT) continue; default: BADSCRIPT((char*)"PLAN-7 Bad argument to plan definition %s",planName) } } if (!plan) return ptr; // nothing defined if (parenLevel) BADSCRIPT((char*)"PLAN-5 Failure to balance ( in %s\r\n",planName) if (duplicateCount && functionArgumentCount != baseArgumentCount) BADSCRIPT((char*)"PLAN->? Additional copies of %s must have %d arguments\r\n",planName,baseArgumentCount) AddInternalFlag(plan,(unsigned int)(FUNCTION_NAME|build|IS_PLAN_MACRO)); plan->w.planArgCount = functionArgumentCount; currentFunctionDefinition = plan; char* data = (char*) mymalloc(MAX_TOPIC_SIZE); // use a big chunk of memory for the data if (!data) ReportBug("Malloc failed for ReadPlan"); *data = 0; char* pack = data; unsigned int toplevelrules = 0; // does not include rejoinders int buffercount = bufferIndex; int frameindex = globalDepth; if (setjmp(scriptJump[++jumpIndex])) { globalDepth = frameindex; bufferIndex = buffercount; ptr = FlushToTopLevel(in, frameindex,data); // if error occurs lower down, flush to here } while (ALWAYS) // read as many tokens as needed to complete the definition { char word[MAX_WORD_SIZE]; ptr = ReadNextSystemToken(in,ptr,word,false); if (!*word) break; if (TopLevelUnit(word)) // definition ends when another major unit starts { ptr -= strlen(word); // let someone else see this starter also // safe break; } switch(*word) { case '#': if (*word == '#' && word[1] == '!') BADSCRIPT((char*)"PLAN-? Verification not meaningful in a plan\r\n") continue; default: MakeLowerCopy(lowercaseForm,word); if (TopLevelRule(lowercaseForm))// absorb a responder/gambit and its rejoinders { ++toplevelrules; if (pack == data) { strcpy(pack,&ENDUNITTEXT[1]); // init 1st rule pack += strlen(pack); } ReadTopLevelRule(plan,lowercaseForm,ptr,in,pack,data); pack += strlen(pack); if ((pack - data) > (MAX_TOPIC_SIZE - 2000)) BADSCRIPT((char*)"PLAN-4 Plan %s data too big. Split it by calling another topic using u: () respond(~subtopic) and putting the rest of the rules in that subtopic\r\n",planName) } else BADSCRIPT((char*)"Expecting responder for plan %s, got %s\r\n",planName,word) } } --jumpIndex; if (toplevelrules > MAX_TOPIC_RULES) BADSCRIPT((char*)"PLAN-8 %s has too many rules- %d must be limited to %d. Call a plantopic.\r\n",planName,toplevelrules,MAX_TOPIC_RULES) size_t len = pack-data; if (!len) WARNSCRIPT((char*)"No data in plan %s\r\n",currentTopicName) if (!endtopicSeen) BADSCRIPT((char*)"PLAN-8 Plan %s cannot succeed since no ^end(plan) exists\r\n",planName) // trailing blank after jump code SetJumpOffsets(data); if (len >= (MAX_TOPIC_SIZE-100)) BADSCRIPT((char*)"PLAN-7 Too much data in one plan\r\n") *pack = 0; char file[200]; if (build == BUILD0) sprintf(file,"%s/BUILD0/plans0.txt", topicfolder); else sprintf(file,"%s/BUILD0/plans1.txt", topicfolder); // write how many plans were found (for when we preload during normal startups) if (hasPlans == 0) { FILE* out = FopenUTF8Write(file); fprintf(out,(char*)"%s",(char*)"0 \r\n"); // reserve 5-digit count for number of plans fclose(out); // dont use Fclose } ++hasPlans; // write out plan data FILE* out = FopenUTF8WriteAppend(file); char* restriction = (char*)"all"; unsigned int len1 = (unsigned int)strlen(restriction); fprintf(out,(char*)"PLAN: %s %u %u %u %s\r\n",planName,(unsigned int) functionArgumentCount,(unsigned int) toplevelrules,(unsigned int)(len + len1 + 7),currentFilename); fprintf(out,(char*)"\" %s \" %s\r\n",restriction,data); fclose(out); // dont use FClose myfree(data); return ptr; } static char* ReadWord(char* ptr, FILE* in, unsigned int build) // readquery: name "xxxxx" text { char word[MAX_WORD_SIZE]; char word1[MAX_WORD_SIZE]; char word2[MAX_WORD_SIZE]; char tmp[MAX_WORD_SIZE]; //word: canonical form / pluralorpast / pastparticile FLAG... Flagn "xxxxxxx" ptr = ReadNextSystemToken(in, ptr, word, false); // name of word if (!*word) return ptr; if (!IsAlphaUTF8(*word)) BADSCRIPT((char*)"word defined %s must be alpha\r\n", word); if (TopLevelUnit(word)) // definition ends when another major unit starts { ptr -= strlen(word); // let someone else see this starter return ptr; } ptr = ReadNextSystemToken(in, ptr, word1, false); bool form1 = false; bool form2 = false; if (*word1 == '/') { ptr = ReadNextSystemToken(in, ptr, word1, false); if (!IsAlphaUTF8(*word1)) BADSCRIPT((char*)"word form1 defined %s must be alpha\r\n", word1); form1 = true; ptr = ReadNextSystemToken(in, ptr, word2, false); if (*word2 == '/') { ptr = ReadNextSystemToken(in, ptr, word2, false); if (!IsAlphaUTF8(*word2)) BADSCRIPT((char*)"word form2 defined %s must be alpha\r\n", word2); form2 = true; ptr = ReadNextSystemToken(in, ptr, tmp, false); } else strcpy(tmp, word2); } else strcpy(tmp, word1); WORDP D = StoreWord(word, AS_IS); WORDP E = NULL; WORDP F = NULL; if (form1) E = StoreWord(word1, AS_IS); if (form2) F = StoreWord(word2, AS_IS); while (*tmp && !TopLevelUnit(tmp)) { if (*tmp != '"') // not a gloss (currently gloss not supported) { uint64 properties = FindPropertyValueByName(tmp); uint64 sysflags = FindSystemValueByName(word); unsigned int bits2 = (unsigned int)FindParseValueByName(word); if (properties & NOUN) { if (D->internalBits & UPPERCASE_HASH) properties |= NOUN_PROPER_SINGULAR; else properties |= NOUN_SINGULAR; } else if (properties & VERB) properties |= VERB_INFINITIVE; if (properties) AddProperty(D, properties); if (sysflags) AddSystemFlag(D, sysflags); if (bits2) AddParseBits(D, bits2); if (form1) { if (properties & NOUN) { if (D->internalBits & UPPERCASE_HASH) AddProperty(E, NOUN_PROPER_PLURAL|NOUN); else AddProperty(E, NOUN_PLURAL|NOUN); } else if (properties & VERB) { AddProperty(E, VERB_PAST|VERB); if (form2) AddProperty(F, VERB_PAST_PARTICIPLE|VERB); } } } ptr = ReadNextSystemToken(in, ptr, tmp, false); } if (TopLevelUnit(tmp)) ptr -= strlen(tmp); // let someone else see this starter return ptr; } static char* ReadQuery(char* ptr, FILE* in, unsigned int build) // readquery: name "xxxxx" text { while (ALWAYS) // read as many tokens as needed to complete the definition (must be within same file) { char word[MAX_WORD_SIZE]; char query[MAX_WORD_SIZE]; ptr = ReadNextSystemToken(in,ptr,word,false); // name of query if (!*word) break; size_t len = strlen(word); if (!IsAlphaUTF8(*word)) BADSCRIPT((char*)"query label %s must be alpha\r\n",word); if (TopLevelUnit(word)) // definition ends when another major unit starts { ptr -= len; // let someone else see this starter break; } ptr = ReadNextSystemToken(in,ptr,query,false); if (*query != '"') BADSCRIPT((char*)"query body %s must be in quotes\r\n",query); WORDP D = StoreWord(word); AddInternalFlag(D, (unsigned int)(QUERY_KIND|build)); char* at = strchr(query+1,'"'); if (!at) { BADSCRIPT((char*)"query body %s must end in quotes\r\n", query); } else *at = 0; D->w.userValue = AllocateHeap(query+1); } return ptr; } static char* ReadReplace(char* ptr, FILE* in, unsigned int build) { while (ALWAYS) // read as many tokens as needed to complete the definition (must be within same file) { char word[MAX_WORD_SIZE]; char* original = word; char replace[MAX_WORD_SIZE]; ptr = ReadNextSystemToken(in,ptr,original,false); if (!stricmp(original,(char*)"replace:")) ptr = ReadNextSystemToken(in,ptr,original,false); // keep going with local replace loop if (!*original) break; // file ran dry if (TopLevelUnit(original)) // definition ends when another major unit starts { ptr -= strlen(original); // let someone else see this starter break; } ptr = ReadNextSystemToken(in,ptr,replace,false); if (*replace == '(') { char data[MAX_WORD_SIZE]; char* pack = data; while (*--ptr != '('); // back up to the ( ptr = ReadPattern(ptr, in, pack, false, false); char* pat = data-1; while ((pat = strchr(++pat, '~')) ) { if (*(pat - 1) == ' ') { BADSCRIPT((char*)"CONCEPT-1 Concept name not allowed in replace: pattern %s\r\n", pat - 1) } } strcpy(replace, data); } else if (!stricmp(replace, "![")) // reacquire ![xxx xxx]value { // replace: jack_rusell ![terrier]Jack + Russell + terrier // This says to replace jack_russell with Jack + Russel + terrior ONLY if the word immediately following the match char* end = strchr(ptr, ']'); *end = 0; strcat(replace, ptr); strcat(replace, "]"); *end = ']'; char extra[MAX_WORD_SIZE]; ptr = ReadNextSystemToken(in, end+1, extra, false); strcat(replace, extra); } if (TopLevelUnit(replace)) // definition ends when another major unit starts { ptr -= strlen(replace); // let someone else see this starter break; } if (*original == '\'') // original only - single quoted { memmove(original + 1, original, strlen(original)+1); *original = '*'; } SetSubstitute( original, replace, build, DO_PRIVATE, 0); } return ptr; } static char* ReadIgnoreSpell(char* ptr, FILE* in, unsigned int build) { while (ALWAYS) // read as many tokens as needed to complete the definition (must be within same file) { char ignore[MAX_WORD_SIZE]; ptr = ReadNextSystemToken(in, ptr, ignore, false); if (!stricmp(ignore, (char*)"ignorespell:")) ptr = ReadNextSystemToken(in, ptr, ignore, false); // keep going with local ignore loop if (!*ignore) break; // file ran dry if (TopLevelUnit(ignore)) // definition ends when another major unit starts { ptr -= strlen(ignore); // let someone else see this starter break; } if (*ignore == '*' && !ignore[1]) { nospellcheck = true; continue; } if (*ignore == '!' && ignore[1] == '*' && !ignore[2]) { nospellcheck = false; continue; } WORDP D = StoreWord(ignore, 0); if (!(D->systemFlags & HAS_SUBSTITUTE) && *D->word != '^' && *D->word != '$' && *D->word != '%' && *D->word != '~') { D->internalBits |= DO_NOISE; } } return ptr; } static char* ReadPrefer(char* ptr, FILE* in, unsigned int build) { while (ALWAYS) // read as many tokens as needed to complete the definition (must be within same file) { char word[MAX_WORD_SIZE]; ptr = ReadNextSystemToken(in, ptr, word, false); if (!*word) break; // file ran dry if (TopLevelUnit(word)) // definition ends when another major unit starts { ptr -= strlen(word); // let someone else see this starter break; } WORDP D = StoreWord(word); D->internalBits |= PREFER_THIS_UPPERCASE; } return ptr; } void WriteCanon(char* word, char* canon, char* form) { WritePatternWord(canon); // must recognize this word for spell check WritePatternWord(word); // must recognize this word for spell check // handle multiple language restrictions char restrict[MAX_WORD_SIZE]; *restrict = 0; if (multidict ) { sprintf(restrict, "~l%d", languageIndex); strcat(canon, restrict); strcat(word, restrict); } char filename[SMALL_WORD_SIZE]; sprintf(filename,(char*)"%s/BUILD%s/canon%s.txt", topicfolder,baseName,baseName); FILE* out = FopenUTF8WriteAppend(filename); if (!form) form = ""; fprintf(out,(char*)" %s %s %s\r\n", canon,word,form); fclose(out); // dont use FClose } static char* ReadCanon(char* ptr, FILE* in, unsigned int build) { while (ALWAYS) // read as many tokens as needed to complete the definition (must be within same file) { char word[MAX_WORD_SIZE]; char canon[MAX_WORD_SIZE]; ptr = ReadNextSystemToken(in,ptr,word,false); if (!stricmp(word,(char*)"canonical:")) ptr = ReadNextSystemToken(in,ptr,word,false); // keep going with local loop if (!*word) break; // file ran dry size_t len = strlen(word); if (TopLevelUnit(word)) // definition ends when another major unit starts { ptr -= len; // let someone else see this starter break; } ptr = ReadNextSystemToken(in,ptr,canon,false); char form[MAX_WORD_SIZE]; *form = 0; if (*ptr) ReadNextSystemToken(in, ptr, form, false,true); if (!stricmp(form, "MORE_FORM") || !stricmp(form, "MOST_FORM")) { ptr = ReadNextSystemToken(in, ptr, form, false); } else *form = 0; WriteCanon(word,canon,form); } return ptr; } static char* ReadConcept(char* ptr, FILE* in,unsigned int build) { char conceptName[MAX_WORD_SIZE]; *conceptName = 0; MEANING concept = 0; WORDP D = NULL; bool ignoreSpell = false; patternContext = false; bool more = false; bool undeclared = true; bool startOnly = false; bool endOnly = false; int parenLevel = 0; uint64 type = 0; uint64 sys; bool notted = false; bool emoticon = false; int quoted = 0; bool duplicate = false; while (ALWAYS) // read as many tokens as needed to complete the definition (must be within same file) { char word[MAX_WORD_SIZE]; char* z = ReadNextSystemToken(in,ptr,word,false); ptr = z; if (!*word) break; // file ran dry size_t len = strlen(word); if (TopLevelUnit(word)) // definition ends when another major unit starts { if (TopLevelUnit(word)) ptr -= len; // let someone else see this starter break; } // establish name and characteristics of the concept if (!*conceptName) // get the concept name, will be ~xxx or :xxx { if (*word != '~' ) BADSCRIPT((char*)"CONCEPT-1 Concept name must begin with ~ or : - %s\r\n",word) // Users may not create repeated user topic names. Ones already saved in dictionary are fine to overwrite MakeLowerCopy(conceptName,word); if (!IsLegalName(conceptName)) BADSCRIPT((char*)"CONCEPT-2 Illegal characters in concept name %s\r\n",conceptName) D = StoreWord(conceptName,AS_IS); D->internalBits |= CONCEPT; // note we have seen definition char cumulate[MAX_WORD_SIZE]; strcpy(cumulate, conceptName); strcat(cumulate, "$"); char* bot = (*scopeBotName == ' ') ? (scopeBotName + 1) : scopeBotName; strcat(cumulate, bot); size_t l = strlen(cumulate); if (cumulate[l - 1] == ' ') cumulate[l - 1] = 0; // remove trailing blank WORDP alt = StoreWord(cumulate, AS_IS); alt->internalBits |= BEEN_HERE; // has been defined concept = MakeMeaning(D); sys = type = 0; parenLevel = 0; Log(USERLOG,"Reading concept %s\r\n",conceptName); AddMap((char*)" concept:", conceptName, &mapItemCount); // read the control flags of the concept ptr = SkipWhitespace(ptr); while (*ptr && *ptr != '(' && *ptr != '[' && *ptr != '"') // not started and no concept comment given (concept comments come after all control flags { ptr = ReadCompiledWord(ptr,word); len = strlen(word); if (word[len-1] == '(') { word[len-1] = 0; --ptr; if (*ptr != '(') --ptr; } if (!stricmp(word,(char*)"more")) { more = true; continue; } if (!stricmp(word,(char*)"duplicate")) // allow duplicate keywords { duplicate = true; continue; } if (!stricmp(word, (char*)"INTERJECTION")) // match member as interjection { endOnly = startOnly = true; continue; } if (!stricmp(word, (char*)"EMOTICON")) { emoticon = true; continue; } if (!stricmp(word, (char*)"START_ONLY")) // match member only at start of sentence { startOnly = true; continue; } if (!stricmp(word, (char*)"END_ONLY")) // match member only at end of sentence { endOnly = true; continue; } char* paren = strchr(word,'('); if (paren) // handle attachment of paren + stuff { while (*--ptr != '('); *paren = 0; } ptr = SkipWhitespace(ptr); uint64 bits = FindPropertyValueByName(word); type |= bits; uint64 bits1 = FindSystemValueByName(word); sys |= bits1; unsigned int bits2 = (unsigned int)FindParseValueByName(word); if (sys & NOCONCEPTLIST) { AddInternalFlag(D,FAKE_NOCONCEPTLIST); sys ^= NOCONCEPTLIST; } if (bits) AddProperty(D, bits); else if (bits1) AddSystemFlag(D, bits1); else if (bits2) AddParseBits(D,bits2); else if (!stricmp(word,(char*)"IGNORESPELLING")) ignoreSpell = true; else if (!stricmp(word,(char*)"UPPERCASE_MATCH")) AddInternalFlag(D,UPPERCASE_MATCH); else if (!stricmp(word,(char*)"ONLY_NOUNS")) AddSystemFlag(D,NOUN); else if (!stricmp(word,(char*)"ONLY_VERBS")) AddSystemFlag(D,VERB); else if (!stricmp(word,(char*)"ONLY_ADJECTIVES")) AddSystemFlag(D,ADJECTIVE); else if (!stricmp(word,(char*)"ONLY_ADVERBS")) AddSystemFlag(D,ADVERB); else if (!stricmp(word,(char*)"ONLY_NONE")) AddSystemFlag(D,ONLY_NONE); // disable ONLY here and below else BADSCRIPT((char*)"CONCEPT-4 Unknown concept property %s\r\n",word) } continue; // read more tokens now that concept has been established } if (undeclared) { undeclared = false; // dont test this again if (!more && !noconcept) { int buildbits = D->internalBits & (BUILD0 | BUILD1 ); if (!myBot && GetObjectHead(D) && buildbits && !nowarndupconcept) { if (language_bits) { FACT* F = GetVerbHead(D); if (FactLanguage(F) == language_bits) WARNSCRIPT((char*)"CONCEPT-3 Concept/topic already defined %s\r\n", conceptName) } else WARNSCRIPT((char*)"CONCEPT-3 Concept/topic already defined %s\r\n",conceptName) } if (HasBotMember(D, myBot) && GetObjectHead(D) && !nowarndupconcept) { WARNSCRIPT((char*)"CONCEPT-3 Concept/topic already defined %s\r\n", conceptName) } } AddInternalFlag(D,(unsigned int)build); if (duplicate) AddInternalFlag(D, CONCEPT_DUPLICATES); else AddInternalFlag(D, NO_CONCEPT_DUPLICATES); } // read the keywords zone of the concept switch(*word) // THE MEAT OF CONCEPT DEFINITIONS { case '(': case '[': // start keyword list if (parenLevel) BADSCRIPT((char*)"CONCEPT-5 Cannot use [ or ( within a keyword list for %s\r\n",conceptName); parenLevel++; isConcept = emoticon; break; case ')': case ']': // end keyword list --parenLevel; if (parenLevel < 0) BADSCRIPT((char*)"CONCEPT-6 Missing ( for concept definition %s\r\n",conceptName) break; default: ptr = ReadKeyword(word,ptr,notted,quoted,concept,type,ignoreSpell,build,duplicate,startOnly,endOnly,emoticon); } if (parenLevel == 0) break; } if (parenLevel) BADSCRIPT((char*)"CONCEPT-7 Failure to give closing ( in concept %s\r\n",conceptName) isConcept = false; return ptr; } static void ReadTopicFile(char* name,uint64 buildid) // read contents of a topic file (.top or .tbl) { convertTabs = true; tableinput = NULL; callingSystem = 0; chunking = false; unsigned int build = (unsigned int) buildid; size_t len = strlen(name); if (len <= 4) return; // Check the filename is at least four characters (the ext plus one letter) // and matches either .top or .tbl char* suffix = name + len - 4; if (stricmp(suffix, (char*) ".top") && stricmp(suffix, (char*) ".tbl")) return; FILE* in = FopenReadNormal(name); if (!in) { if (strchr(name,'.') || build & FROM_FILE) // names a file, not a directory { WARNSCRIPT((char*)"Missing topic file %s\r\n",name) ++missingFiles; } return; } char word[MAX_WORD_SIZE]; *readBuffer = 0; // insure no carryover from elsewhere ReadNextSystemToken(NULL, NULL, word, false, false); // flush cache build &= -1 ^ FROM_FILE; // remove any flag indicating it came as a direct file, not from a directory listing Log(USERLOG,"\r\n----Reading file %s %s\r\n",currentFilename, scopeBotName); char map[MAX_WORD_SIZE]; char file[MAX_WORD_SIZE]; GetCurrentDir(file, MAX_WORD_SIZE); sprintf(map,"%s/%s",file,name); char* find = map; while ((find = strchr(find,'\\'))) *find = '/'; AddMap((char*)"file:", map, &mapTopicFileCount); if (mapFileJson) fprintf(mapFileJson, (char*)",\r\n\"items\" : [\r\n"); mapItemCount = 0; mapRuleCount = 0; // if error occurs lower down, flush to here patternContext = false; char* ptr = ""; int buffercount = bufferIndex; int frameindex = globalDepth; if (setjmp(scriptJump[++jumpIndex])) { bufferIndex = buffercount; ptr = FlushToTopLevel(in, frameindex,0); } while (ALWAYS) { isConcept = false; ptr = ReadNextSystemToken(in,ptr,word,false); // eat tokens (should all be top level) if (!*word) break; // no more tokens found currentFunctionDefinition = NULL; // can be set by ReadTable or ReadMacro if (!stricmp(word,(char*)":quit")) break; if (*word == ':' && word[1] && !strstr(readBuffer, "^eval")) // testing command not near an eval { char output[MAX_WORD_SIZE]; DoCommand(readBuffer, output); *readBuffer = 0; *ptr = 0; } else if (!stricmp(word, (char*)"concept:")) ptr = ReadConcept(ptr, in, build); else if (!stricmp(word, (char*)"query:")) ptr = ReadQuery(ptr, in, build); else if (!stricmp(word, (char*)"word:")) ptr = ReadWord(ptr, in, build); else if (!stricmp(word, (char*)"replace:")) ptr = ReadReplace(ptr, in, build); else if (!stricmp(word, (char*)"ignorespell:")) ptr = ReadIgnoreSpell(ptr, in, build); else if (!stricmp(word, (char*)"prefer:")) ptr = ReadPrefer(ptr, in, build); else if (!stricmp(word, (char*)"canon:")) ptr = ReadCanon(ptr, in, build); else if (!stricmp(word, (char*)"topic:")) ptr = ReadTopic(ptr, in, build); else if (!stricmp(word, (char*)"plan:")) ptr = ReadPlan(ptr, in, build); else if (!stricmp(word, (char*)"language:")) { ptr = ReadNextSystemToken(in, ptr, word, false); if (multidict) { SetLanguage(word); Log(USERLOG, "\r\n>>Setting language %s\r\n", word); } } else if (!stricmp(word, (char*)"bot:")) { globalBotScope = false; // lasts for this file ptr = ReadBot(ptr); } else if (!strnicmp(word, (char*)"debug:", 6)) { DebugCode(""); } else if (!stricmp(word,(char*)"table:")) ptr = ReadTable(ptr,in,build,false); else if (!stricmp(word,(char*)"rename:")) ptr = ReadRename(ptr,in,build); else if (!stricmp(word,(char*)"describe:")) ptr = ReadDescribe(ptr,in,build); else if (!stricmp(word, (char*)"patternMacro:") || !stricmp(word, (char*)"outputMacro:") || !stricmp(word, (char*)"dualMacro:") || !stricmp(word, (char*)"tableMacro:")) { char* data = AllocateBuffer(); ptr = ReadMacro(ptr, in, word, build,data); FreeBuffer(); } else BADSCRIPT((char*)"FILE-1 Unknown top-level declaration %s in %s\r\n",word,name) } FClose(in); // this should be the only such, not fclose. if (mapFileJson) fprintf(mapFileJson, (char*)"\r\n]\r\n}"); --jumpIndex; if (hasHighChar) { WARNSCRIPT((char*)"File %s has no utf8 BOM but has character>127 - extended Ansi changed to normal Ascii line '%c' %x %s\r\n", name, hasHighChar,hasHighChar,readBuffer) // should have been utf 8 or have no high data. } if (!globalBotScope) // restore any local change from this file { myBot = 0; *scopeBotName = 0; } } static void DoubleCheckDefinition(unsigned int build,char* topicfolder,char* baseName) { uint64 oldbot = myBot; HEAPREF list = undefinedCallThreadList; while (list) { uint64 functionNamex; uint64 filenamex; uint64 linex; list = UnpackHeapval(list, functionNamex, filenamex,linex); if (!functionNamex) continue; // reference check canceled char* functionData = (char*)functionNamex; myBot = *(uint64*)functionData; char* filename = (char*)filenamex; char* line = (char*) linex; int args = functionData[8] ; strcpy(currentFilename, filename); currentFileLine = (int)(uint64)line; char* fn = functionData + 9; char* botname = functionData + 10 + strlen(fn); WORDP D = FindWord(fn); WORDP fnword = NULL; // allow function calls indirect off variables if (!D && fn[1] != USERVAR_PREFIX) { WARNSCRIPT((char*)"Undefined function %s in bot %s\r\n", fn, botname) char filename[SMALL_WORD_SIZE]; sprintf(filename, (char*)"%s/BUILD%s/macros%s.txt", topicfolder, baseName, baseName); FILE* out = FopenUTF8WriteAppend(filename); fprintf(out, (char*)"%s %d %s\r\n", fn, args,botname); fclose(out); // dont use Fclose } } list = undefinedConceptThreadList; while (list) { uint64 conceptNamex; uint64 filenamex; uint64 linex; list = UnpackHeapval(list, conceptNamex, filenamex, linex); if (!conceptNamex) continue; // reference check canceled char* conceptData = (char*)conceptNamex; myBot = *(uint64*)conceptData; char* filename = (char*)filenamex; char* line = (char*)linex; strcpy(currentFilename, filename); currentFileLine = (int)(uint64)line; char* concept = conceptData + 9; char* botname = conceptData + 10 + strlen(concept); WORDP D = FindWord((char*)concept); if (D && D->internalBits & TOPIC) continue; WARNSCRIPT((char*)"Undefined concept %s in bot %s\r\n", concept, botname) } myBot = oldbot; } static void DoubleCheckReuse() { char file[200]; sprintf(file,"%s/missingLabel.txt", topicfolder); FILE* in = FopenReadWritten(file); if (!in) return; char label[MAX_WORD_SIZE]; char bothead[MAX_WORD_SIZE]; while (ReadALine(readBuffer,in) >= 0) { char *ptr = ReadCompiledWord(readBuffer, label); // topic + label if (*label == '~') { MakeLowerCase(label); char* dot = strchr(label, '.'); if (dot) MakeUpperCase(dot + 1); } ptr = ReadCompiledWord(ptr, bothead); // from file MakeUpperCase(bothead); ptr = ReadCompiledWord(ptr,tmpWord); // filename // from file while (!strstr(tmpWord,".top")) ptr = ReadCompiledWord(ptr, tmpWord); // extra bot names int line; ptr = ReadInt(ptr,line); // line number // from line char labelx[MAX_WORD_SIZE]; sprintf(labelx, "%s-%s", label, bothead); WORDP D = FindWord(labelx); if (!D) // cant find as bot specific, check for general { sprintf(labelx, "%s-*", label); D = FindWord(labelx); } if (!D) WARNSCRIPT((char*)"Missing label %s for reuse in bot %s in File: %s Line: %d \r\n",label, bothead, tmpWord,line) } fclose(in); // dont use Fclose remove(file); } static void InsureSafeSpellcheck(char* word, bool dictionaryBuild) { // all pieces of a multiword keyword need to avoid spell changes // there may be a problem if the master key becomes universal and its pieces do not if (!word || !*word) return; // Spellcheck should not harm keywords or components of keywords. Insure some mark exists. // Spellcheck can adjust case without causing recognition damage. WORDP X = FindWord(word, 0, LOWERCASE_LOOKUP); if (X && (X->properties & TAG_TEST || X->systemFlags & PATTERN_WORD)) return; WORDP Y = FindWord(word, 0, UPPERCASE_LOOKUP); if (Y && (Y->properties & TAG_TEST || Y->systemFlags & PATTERN_WORD)) return; // protect lower case form ONLY if is multiword header if (strchr(word,'_')) { char data[MAX_WORD_SIZE]; MakeLowerCopy(data, word); WORDP Z; size_t len = strlen(data); if (data[len - 1] == 's') Z = StoreWord(data, AS_IS); // dont force uppercase on plurals like Cousins else Z = StoreWord(word, AS_IS); if (Z) { AddSystemFlag(Z, PATTERN_WORD); AddWordItem(Z, dictionaryBuild); } } else // protect in whatever case it is { WORDP Z = StoreWord(word, AS_IS); AddSystemFlag(Z, PATTERN_WORD); AddWordItem(Z, dictionaryBuild); } } static size_t WriteCMore(FACT* F, char*&word,FILE* out,size_t lineSize,uint64 build) { char wordcopy[MAX_WORD_SIZE]; strcpy(wordcopy, word); char* tilde = strchr(wordcopy+1, '~'); char* tick = strchr(wordcopy, '`'); // bot id marker if (tick) *tick = 0; //back to original word before bot added unsigned int langbits = language_bits; if (tilde) // use given language { language_bits = atoi(tilde + 2) << LANGUAGE_SHIFT; *tilde = 0; //back to original word before language added } // insure whole word safe from spell check InsureSafeSpellcheck(wordcopy, build); // protect whole word char* sep = strchr(word, '_'); size_t wlen = strlen(word) + 1; strcat(word, " "); // space separator fwrite(word, 1, wlen, out); lineSize += wlen; if (lineSize > 500) // avoid long lines { fprintf(out, (char*)" %s", (char*)"\r\n "); lineSize = 0; } *word = 0; language_bits = langbits; return lineSize; } static char* MakeToken(char* input, char* word) { strcpy(word, input + 1); size_t len = strlen(word); word[len - 1] = ' '; // remove trailing quote ForceUnderscores(word); return word; } static size_t WriteExclude(FILE* out, FACT* F, char* word, size_t lineSize, uint64 build) { WORDP E = Meaning2Word(F->subject); AddBeenHere(E); char* dict = strchr(word + 1, '~'); // has a wordnet attribute on it if (dict) // full wordnet word reference { if (E->inferMark != inferMark) SetTriedMeaning(E, 0); E->inferMark = inferMark; if (dict) { unsigned int which = atoi(dict + 1); if (which) // given a meaning index, mark it { uint64 offset = 1ull << which; SetTriedMeaning(E, GetTriedMeaning(E) | offset); } } } char* key = WriteMeaning(F->subject, true); char* u = key; if (*key != '`' && *key != '"') while ((u = strchr(u, ' '))) *u = '_';// keys must be composites, not spaced words if (*E->word == '"') word = MakeToken(E->word, word);// change string to std token else if (F->flags & ORIGINAL_ONLY) sprintf(word, (char*)"!'%s ", key); else if (F->flags & RAWCASE_ONLY) sprintf(word, (char*)"!!'%s ", key); else sprintf(word, (char*)"!%s ", key); lineSize = WriteCMore(F, word, out, lineSize, build); KillFact(F); return lineSize; } static size_t WriteMember(FILE* out,FACT* F,char* word,size_t lineSize, uint64 build) { WORDP D = Meaning2Word(F->subject); AddBeenHere(D); char* member = WriteMeaning(F->subject, true); char* u = member; while ((u = strchr(u, ' '))) *u = '_'; // cannot be freestanding, must be _ or quoted or backquoted if (*D->word == '"') word = MakeToken(D->word, word); // change string to std token else if (F->flags & ORIGINAL_ONLY) sprintf(word, (char*)"'%s", member); else if (F->flags & RAWCASE_ONLY) sprintf(word, (char*)"''%s", member); else sprintf(word, (char*)"%s", member); // generate header words in correct language as well (universal as needed) //GetHeaderWord(word); // trigger any header word in correct language char* dict = strchr(word + 1, '~'); // has a wordnet attribute on it if (*word == '~' || dict) // concept or full wordnet word reference { if (D->inferMark != inferMark) SetTriedMeaning(D, 0); D->inferMark = inferMark; if (dict) { unsigned int which = atoi(dict + 1); if (which) // given a meaning indD, mark it { uint64 offset = 1ull << which; SetTriedMeaning(D, GetTriedMeaning(D) | offset); } } } uint64 botid = F->botBits; char* lang = strstr(word+1, "~l"); // will be after any botid if (botid)// put bot id before any language id { char number[100]; char language[100]; *language = 0; if (lang) { strcpy(language, lang); *lang = 0; } sprintf(number, "`%s", PrintU64(botid)); strcat(word, number); strcat(word, language); } // write it out- this INVERTS the order now and when read back in, will be reestablished correctly // but dictionary storage locations will be inverted return WriteCMore(F, word, out, lineSize, build); } static void WriteConcepts(WORDP D, uint64 build) // do last, so dictionary words max already named for *.bin form { // this will leave dict words dirty with BEEN_HERE bits char* name = D->word; if (*name != '~' || !(D->internalBits & build)) return; // not a topic or concept or not defined this build RemoveInternalFlag(D,(BUILD0|BUILD1)); // write out keywords FILE* out = NULL; char filename[SMALL_WORD_SIZE]; sprintf(filename,(char*)"%s/BUILD%s/keywords%s.txt", topicfolder,baseName,baseName); out = FopenUTF8WriteAppend(filename); fprintf(out,(D->internalBits & TOPIC) ? (char*)"T%s " : (char*)" %s ", D->word); uint64 properties = D->properties; uint64 bit = START_BIT; while (properties && bit) { if (properties & bit && !(bit & AS_IS)) { properties ^= bit; fprintf(out,(char*)"%s ",FindNameByValue(bit)); } bit >>= 1; } properties = D->systemFlags; bit = START_BIT; while (properties && bit) { // dont write this out in keywords see FAKE_NOCONCEPTLIST - some of these go in DICTn file if (properties & bit && !(bit & (PATTERN_WORD|NOCONCEPTLIST|MARKED_WORD))) { char* name = FindSystemNameByValue(bit); properties ^= bit; fprintf(out,(char*)"%s ",name); } bit >>= 1; } if (D->internalBits & FAKE_NOCONCEPTLIST) fprintf(out,(char*)"%s",(char*)"NOCONCEPTLIST "); if (D->internalBits & UPPERCASE_MATCH) fprintf(out,(char*)"%s",(char*)"UPPERCASE_MATCH "); if (D->internalBits & CONCEPT_DUPLICATES) fprintf(out, (char*)"%s", (char*)"DUPLICATE "); else fprintf(out, (char*)"%s", (char*)"NODUPLICATE "); int n = 10; seeAllFacts = true; // do for all bots at once in all languages FACT* E = GetObjectNondeadHead(D); while (E) { if (E->verb == Mmember && E->flags & START_ONLY) { fprintf(out, (char*)"%s", (char*)"START_ONLY "); } if (E->verb == Mmember && E->flags & END_ONLY) { fprintf(out, (char*)"%s", (char*)"END_ONLY "); } if (E->verb == Mmember && E->flags & (END_ONLY | START_ONLY)) break; if (--n == 0) break; // should have found by now E = GetObjectNondeadNext(E); } fprintf(out, (char*)"%s", (char*)"( "); size_t lineSize = 0; NextInferMark(); char* buffer = AllocateStack(NULL, maxBufferSize); unsigned int oldlanguage = language_bits; // write out simple excludes only //We want them as most recent facts to find, so we stop checking when we run out MEANING MconceptPattern = MakeMeaning(FindWord("conceptpattern")); // does not have to be found FACT* F = GetObjectNondeadHead(D); if (F) { while (F) { WORDP E = Meaning2Word(F->subject); if (build == BUILD1 && !(F->flags & FACTBUILD1)) { ; } // defined by earlier level else if (F->verb == MconceptPattern) { ; } else if (F->verb == Mexclude && *E->word != '~') // only relevant simple word facts { lineSize = WriteExclude(out, F, buffer, lineSize, build); } F = GetObjectNondeadNext(F); } } // now do set excludes F = GetObjectNondeadHead(D); if (F) { while (F) { WORDP D = Meaning2Word(F->subject); if (build == BUILD1 && !(F->flags & FACTBUILD1)) { ; } // defined by earlier level else if (F->verb == Mexclude && *D->word == '~') // the only relevant concept exclude facts { lineSize = WriteExclude(out, F, buffer, lineSize, build); } F = GetObjectNondeadNext(F); } } // write out set members here, dont write out excludes here F = GetObjectNondeadHead(D); if (F) { while (F) { if (build == BUILD1 && !(F->flags & FACTBUILD1)) { ; } // defined by earlier level else if (F->verb == Mmember) { lineSize = WriteMember(out, F, buffer, lineSize, build); KillFact(F); } F = GetObjectNondeadNext(F); } } ReleaseStack(buffer); fprintf(out, (char*)"%s", ")\r\n"); fclose(out); // dont use Fclose seeAllFacts = false; } static void WriteDictionaryChange(FILE* dictout, unsigned int build) { // Note that topic labels (topic.name) and pattern words will not get written // All words in the dictionary were written out with language from the dictionaries before. // Loading this level may introduce new words with language. Or it can generate // UNIVERSAL words, which may make all other same words per language become SUPERCEDED. // We write out newly superceded flagged words but not prior ones. // Superceding means copying all facts of the old language oriented word to use the // new word (killing the old facts). We will not write out the old facts. FILE* in = NULL; char file[SMALL_WORD_SIZE]; int layer = 0; if (build == BUILD0) layer = 0; else if (build == BUILD1) layer = 1; sprintf(file, (char*)"%s/prebuild%c.bin", tmpfolder,layer + '0'); in = FopenReadWritten(file); if (!in) { ReportBug((char*)"prebuild bin not found"); return; } unsigned int check = Read32(in); // version stamp if (check != CHECKSTAMP) ReportBug("Fatal: checkstamp writedictchanged"); seeAllFacts = true; for (WORDP D = dictionaryBase + 1; D < dictionaryFree; ++D) { if (!D->word) continue; // not a real entry D->internalBits &= -1 ^ BEEN_HERE; D->systemFlags &= -1 ^ MARKED_WORD; uint64 oldproperties,oldflags; unsigned int oldintbits,oldlength,newlength, oldheader; uint64 newproperties = oldproperties = D->properties; uint64 newflags = oldflags = D->systemFlags; unsigned int newheader = oldheader = GETMULTIWORDHEADER(D); unsigned int newintbits = oldintbits = D->internalBits; oldlength = newlength = D->length; bool deleted = SUPERCEDED(D); unsigned int newsubstituteindex = 0; if (D->systemFlags & HAS_SUBSTITUTE && D->w.substitutes) { newsubstituteindex = Word2Index(D->w.substitutes); // if zero means substitute is a full delete } else if (D->internalBits & CONDITIONAL_IDIOM && D->w.conditionalIdiom) { newsubstituteindex = Word2Index(D->w.substitutes); } unsigned int oldsubstituteindex = 0; bool changed = true; // default is new words bool changedForeign = false; bool newlyAdded = false; if (D < dictionaryPreBuild[layer]) // word preexisted this level, so see if it changed { unsigned int offset = (unsigned int)(D - dictionaryBase); int result = fread(&oldintbits,1,4,in); if (result != 4) // ran out of old words { break; } if (oldintbits != offset) { (*printer)((char*)"%s", (char*)"Bad dictionary change test\r\n"); myexit("bad dict", 4); } fread(&oldproperties,1,8,in); fread(&oldflags,1,8,in); fread(&oldintbits,1,4,in); //old internal fread(&oldheader, 1, 1, in); // multiword substitute header fread(&oldlength,1,4,in); fread(&oldsubstituteindex, 1, 4, in); // index of word we substitute to unsigned int index = 0; fread(&index, 1, 4, in); if (IsQuery(D) && index) { char query[1000]; fread(&query, 1, index, in); // we dont check for change - not allowed } unsigned char junk; fread(&junk,1,1,in); // 0 marker WORDP foreignFlags[50]; if (junk == 76) { fread(&foreignFlags, languageCount, sizeof(WORDP), in); fread(&junk, 1, 1, in); // 0 marker } if (junk != 77) { (*printer)((char*)"%s", (char*)"out of dictionary change data2?\r\n"); // multiword header myexit("build fail old data"); } // old word, no changes if (D->foreignFlags && !memcmp(D->foreignFlags,&foreignFlags,languageCount * sizeof(WORDP))) { changedForeign = true; } else if (newproperties == oldproperties && newflags == oldflags && newintbits == oldintbits && newlength == oldlength && oldsubstituteindex == newsubstituteindex && newheader == oldheader) continue; // unchanged old } else newlyAdded = true; // word came from this build layer if (deleted && newlyAdded) continue; // newly deleted, dont need to write if (IsQuery(D) && D->internalBits & build ) { fprintf(dictout, (char*)"+query `%s` `%s` \r\n", D->word, D->w.userValue); // query defn , not a rename continue; } // scripts may not alter their properties and they are language independent // if (*D->word == '~' && !( D->systemFlags & NOCONCEPTLIST) ) continue; // dont write topic names or concept names, let keywords do that and no variables if ((*D->word == '_' || *D->word == '@' || *D->word == '#' ) && D->internalBits & RENAMED) { if (!newlyAdded) continue; // written out before } // old word has no characteristics OR facts else if (!newlyAdded && (oldproperties == newproperties && oldflags == newflags && oldintbits == newintbits && oldlength == newlength) && oldsubstituteindex == newsubstituteindex && !GetSubjectNondeadHead(D) && !GetVerbNondeadHead(D) && !GetObjectNondeadHead(D)) continue; // ignore pattern words, etc EXCEPT when field of a fact or with different sys or prop //not checking words without ~ if the ONLY change is an existing word got made into a concept, dont write it out anymore { char word[MAX_WORD_SIZE]; char change = (SUPERCEDED(D)) ? '-' : '^'; if (multidict && GET_LANGUAGE_INDEX(D)) sprintf(word, (char*)"%c `%s~l%u` ", change, D->word, GET_LANGUAGE_INDEX(D)); else sprintf(word, (char*)"%c `%s` ", change, D->word); if (SUPERCEDED(D)) fprintf(dictout, "`%s`\r\n", word); else { char p1[20]; char p2[20]; char s1[20]; char s2[20]; char i1[20]; if (newproperties == oldproperties && !newlyAdded) // inherit from before { strcpy(p1, "@ "); *p2 = 0; } else if (!newproperties) { strcpy(p1, "0 "); *p2 = 0; } else { sprintf(p1, "x%08x ", (unsigned int)(D->properties >> 32)); sprintf(p2, "%08x ", (unsigned int)(D->properties & 0x00000000ffffffff)); } if (newflags == oldflags && !newlyAdded) // inherit from before { strcpy(s1, "@ "); *s2 = 0; } else if (!newflags) { strcpy(s1, "0 "); *s2 = 0; } else if (newflags == PATTERN_WORD) { strcpy(s1, "P "); *s2 = 0; } else { sprintf(s1, "x%08x ", (unsigned int)(D->systemFlags >> 32)); sprintf(s2, "%08x ", (unsigned int)(D->systemFlags & 0x00000000ffffffff)); } if (newintbits == oldintbits && !newlyAdded) // inherit from before strcpy(i1, "@ "); else if (!newintbits) strcpy(i1, "0 "); else sprintf(i1, "x%08x ", newintbits); char parse[40]; if (D->parseBits == 0) strcpy(parse, "0"); else sprintf(parse, "x%08x ", D->parseBits); char actualsub[MAX_WORD_SIZE]; strcpy(actualsub, "``"); if (newsubstituteindex) { sprintf(actualsub, "`%s`", Index2Word(newsubstituteindex)->word); } char header[20]; *header = 0; if (newheader) sprintf(header, "%u", newheader); char bigheader[MAX_WORD_SIZE]; *bigheader = 0; if (changedForeign) { char* ptr = bigheader; strcpy(ptr, "FF "); ptr += 3; for (unsigned int i = 0; i < languageCount; ++i) { sprintf(ptr, "%u ",Word2Index(D->foreignFlags[i])); ptr += strlen(ptr); } } fprintf(dictout, "%s %s %s %s %s %s %s %s %s %s\r\n", word, p1, p2, s1, s2, i1,parse, actualsub,header,bigheader); } } } fclose(in); // dont use Fclose seeAllFacts = false; } static void WriteExtendedFacts(FILE* factout,FILE* dictout,FILE* varout, unsigned int build) { if (!factout || !dictout || !varout) return; fprintf(dictout, "%d %s\r\n", CHECKSTAMPRAW,language_list); fprintf(factout, "%d %s\r\n", CHECKSTAMPRAW,language_list); fprintf(varout, "%d\r\n", CHECKSTAMPRAW); seeAllFacts = true; // save global bot vars to file char* buffer = AllocateBuffer(); bool oldshared = shared; shared = false; char* ptr = WriteUserVariables(buffer,false,true,NULL); shared = oldshared; fwrite(buffer,ptr-buffer,1,varout); FreeBuffer(); char buildchar = (build == BUILD0) ? '0' : '1'; WriteDictionaryChange(dictout,build); seeAllFacts = true; // we only write out new facts created this level. One cannot change old facts if (build == BUILD0) WriteFacts(factout,factsPreBuild[LAYER_0]); else if (build == BUILD1) WriteFacts(factout,factsPreBuild[LAYER_1]); if (multidict) { fwrite("--dead\r\n", 8, 1, factout); WriteDeadFacts(factout); // word converting to universal may kill old facts. fwrite("--lang\r\n", 8, 1, factout); WriteLanguageAdjustedFacts(factout); // guy who became universal, his facts may change language to universal } if (factout) fclose(factout); seeAllFacts = false; } static void DumpErrors() { if (errorIndex) Log(ECHOUSERLOG,(char*)"\r\n ERROR SUMMARY: \r\n"); for (unsigned int i = 0; i < errorIndex; ++i) Log(ECHOUSERLOG,(char*)" %s",errors[i]); } static void DumpWarnings() { if (warnIndex) Log(USERLOG,"\r\nWARNING SUMMARY: \r\n"); *currentFilename = 0; for (unsigned int i = 0; i < warnIndex; ++i) { if (strstr(warnings[i],(char*)"is not a known word")) {} else if (strstr(warnings[i],(char*)" changes ")) {} else if (strstr(warnings[i],(char*)"is unknown as a word")) {} else if (strstr(warnings[i],(char*)"in opposite case")){} else if (strstr(warnings[i],(char*)"a function call")){} else if (strstr(warnings[i], (char*)"multiple spellings")) {} else Log(USERLOG," %s\r\n",warnings[i]); } } static void EmptyVerify(char* name, uint64 junk) { char* x = strstr(name,(char*)"-b"); if (!x) return; char c = (buildID == BUILD0) ? '0' : '1'; if (x[2] == c) unlink(name); } static int CompileCleanup(char* output,uint64 oldtokenControl, unsigned int build) { if (build == BUILD1) { for (unsigned int i = 0; i < undefinedFunctionIndex; ++i) { WORDP D = undefinedFunction[i]; if (!(D->internalBits & IS_OUTPUT_MACRO)) { WARNSCRIPT("*** Function used in Layer 0 never defined here in Layer 1: %s\r\n", D->word); } } } EndScriptCompiler(); build0Requested = build0Requested = false; buildID = 0; int resultcode = 0; numberOfTopics = 0; tokenControl = oldtokenControl; currentRuleOutputBase = currentOutputBase = ourMainOutputBuffer; compiling = NOT_COMPILING; jumpIndex = -1; testOutput = output; // allow summary to go out the server if (hasErrors) { EraseTopicFiles(build, baseName); DumpErrors(); if (missingFiles) Log(ECHOUSERLOG, (char*)"%d topic files were missing.\r\n", missingFiles); Log(ECHOUSERLOG, (char*)"\r\n%d errors - press Enter to quit. Then fix and try again.\r\n", hasErrors); if (!server && !commandLineCompile) ReadALine(readBuffer, stdin); resultcode = 4; // error } else if (hasWarnings) { DumpWarnings(); if (missingFiles) Log(USERLOG,"%d topic files were missing.\r\n", missingFiles); Log(USERLOG,"%d serious warnings, %d function warnings, %d spelling warnings, %d case warnings, %d substitution warnings\r\n ", hasWarnings - badword - substitutes - cases, functionCall, badword, cases, substitutes); } else { if (missingFiles) Log(ECHOUSERLOG, (char*)"%d topic files were missing.\r\n", missingFiles); Log(ECHOUSERLOG, (char*)"No errors or warnings\r\n\r\n"); } Log(ECHOUSERLOG, (char*)"\r\n\r\nFinished compile\r\n\r\n"); Rebegin(0, argc, argv); // reload dict and layers as needed echo = true; if (userlogFile) { fclose(userlogFile); userlogFile = NULL; } return resultcode; } void InitBuild(unsigned int build) { conditionalCompiledIndex = 0; build0jid = 0; build1jid = 0; buildtransientjid = 0; languageadjustedfactsList = 0; myBot = 0; // default deadfactsList = userVariableThreadList = kernelVariableThreadList = botVariableThreadList = NULL; buildId = build; *scopeBotName = 0; ClearWordMaps(); // :build char filex[200]; sprintf(filex, "%s/missingSets.txt", topicfolder); remove(filex); // precautionary sprintf(filex, "%s/missingLabel.txt", topicfolder); remove(filex); if (buildId == BUILD0) EraseTopicBin(BUILD1, "1"); // rebuilding underneath layer 1 MakeDirectory((char*)"TOPIC"); if (buildId == BUILD0) MakeDirectory((char*)"TOPIC/BUILD0"); else if (buildId == BUILD1) MakeDirectory((char*)"TOPIC/BUILD1"); } int ReadTopicFiles(char* name,unsigned int build,int spell) { currentBuild = build; propertyRedefines = NULL; flagsRedefines = NULL; char filename[SMALL_WORD_SIZE]; nospellcheck = false; isDescribe = false; *scopeBotName = 0; myBot = 0; globalBotScope = false; if (build == BUILD1) { strcpy(baseName,(char*)"1"); char dir[200]; sprintf(dir,"%s/BUILD1", topicfolder); MakeDirectory(dir); } else { char dir[200]; sprintf(dir,"%s/BUILD0", topicfolder); MakeDirectory(dir); strcpy(baseName,(char*)"0"); } char* output = testOutput; testOutput = NULL; FILE* in = FopenReadNormal(name); // default was top level chatscript if (!in) { char file[SMALL_WORD_SIZE]; if (*buildfiles) { sprintf(file, (char*)"%s/%s", buildfiles, name); // 2nd default is rawdata itself in = FopenReadNormal(file); } if (!in) { sprintf(file, (char*)"RAWDATA/%s", name); // 2nd default is rawdata itself in = FopenReadNormal(file); } if (!in) { sprintf(file,(char*)"private/%s",name); // 3rd default is private in = FopenReadNormal(file); if (!in) { sprintf(file,(char*)"../%s",name); // 4th default is just above chatscript folder in = FopenReadNormal(file); if (!in) { (*printer)((char*)"%s not found\r\n",name); return 4; } } } } lastDeprecation = 0; hasPlans = 0; char word[MAX_WORD_SIZE]; buildID = build; // build 0 or build 1 *duplicateTopicName = 0; // an example of a repeated topic name found missingFiles = 0; spellCheck = spell; // what spell checking to perform EraseTopicFiles(build,baseName); char file[SMALL_WORD_SIZE]; sprintf(file,(char*)"%s/missingLabel.txt", topicfolder); remove(file); sprintf(file,(char*)"%s/missingSets.txt", topicfolder); remove(file); WalkDirectory((char*)"VERIFY",EmptyVerify,0,false); // clear verification of this level compiling = FULL_COMPILE; errorIndex = warnIndex = hasWarnings = hasErrors = 0; substitutes = cases = functionCall = badword = 0; sprintf(filename,(char*)"%s/BUILD%s/map%s.txt", topicfolder,baseName,baseName); mapFile = FopenUTF8Write(filename); fprintf(mapFile, "\r\n"); // so bytemark not with data fprintf(mapFile, "# file: 0 full_path_to_file optional_botid\r\n"); // so bytemark not with data fprintf(mapFile, "# macro: start_line_in_file name_of_macro optional_botid (definition of user function)\r\n"); // so bytemark not with data fprintf(mapFile, "# line: start_line_in_file offset_byte_in_script (action unit in output) \r\n"); // so bytemark not with data fprintf(mapFile, "# concept: start_line_in_file name_of_concept optional_botid (concept definition) \r\n"); // so bytemark not with data fprintf(mapFile, "# topic: start_line_in_file name_of_topic optional_botid (topic definition) \r\n"); // so bytemark not with data fprintf(mapFile, "# rule: start_line_in_file full_rule_tag_with_possible_label rule_kind (rule definition) \r\n"); // so bytemark not with data fprintf(mapFile, "# Complexity of name_of_macro complexity_metric (complexity metric for function) \r\n"); // so bytemark not with data fprintf(mapFile, "# Complexity of rule full_rule_tag_with_possible_label rule_kind complexity_metric (complexity metric for rule) \r\n"); // so bytemark not with data fprintf(mapFile, "# bot: name_of_macro_it_happens_in botid (possible bot macro) \r\n"); // so bytemark not with data fprintf(mapFile, "\r\n"); // so bytemark not with data AllocateOutputBuffer(); sprintf(filename,(char*)"%s/BUILD%s/map%s.json", topicfolder,baseName,baseName); mapFileJson = FopenBinaryWrite(filename); // no UTF if want to be parsed by jsonopen fprintf(mapFileJson, "[\r\n"); mapTopicFileCount = 0; mapItemCount = 0; mapRuleCount = 0; // init the script output file sprintf(filename,(char*)"%s/BUILD%s/script%s.txt", topicfolder,baseName,baseName); FILE* out = FopenUTF8Write(filename); if (strlen(name) > 100) name[99] = 0; if (!strnicmp(name,(char*)"files",5)) name += 5; // dont need the prefix char* at = strchr(name,'.'); *at = 0; fprintf(out,(char*)"0 %s %s %s\r\n",GetMyTime(time(0)),name,version); // reserve 5-digit count for number of topics + timestamp (AFTER BOM) fclose(out); // dont use fclose uint64 oldtokenControl = tokenControl; tokenControl = 0; topicCount = 0; StartScriptCompiler(true); // read topic files? jumpIndex = -1; if (setjmp(scriptJump[++jumpIndex])) // base of a compile is 0 { return CompileCleanup(output,oldtokenControl, build); } // read file list to service, may also have bot: commands while (ReadALine(readBuffer,in) >= 0) { char* at = ReadCompiledWord(readBuffer,word); if (*word == '#' || !*word) continue; if (!stricmp(word,(char*)"stop") || !stricmp(word,(char*)"exit")) break; // fast abort if (!stricmp(word,"bot:")) { globalBotScope = true; // lasts til changed ReadBot(at); continue; } size_t len = strlen(word); char output[MAX_WORD_SIZE]; if (word[len-1] == '/') // directory request { Log(USERLOG,"\r\n>>Reading folder %s\r\n",word); bool recurse = word[len - 2] == '/'; if (recurse) word[len - 2] = 0; WalkDirectory(word,ReadTopicFile,build,recurse); // read all files in folder (top level) Log(USERLOG,"\r\n<>Setting language %s\r\n", word); } } else ReadTopicFile(word,build|FROM_FILE); // was explicitly named } if (in) fclose(in); fclose(mapFile); fprintf(mapFileJson, "]\r\n"); fclose(mapFileJson); StartFile((char*)"Post compilation Verification"); nospellcheck = false; SetLanguage("UNIVERSAL"); // see all dict and fact values // verify errors across all files DoubleCheckSetOrTopic(); // prove all sets/topics he used were defined DoubleCheckReuse(); // see if jump labels are defined DoubleCheckDefinition(build,topicfolder, baseName); *currentFilename = 0; if (*duplicateTopicName) WARNSCRIPT((char*)"At least one duplicate topic name, i.e., %s, which may intended if bot restrictions differ or concept collision.\r\n",duplicateTopicName) WalkDictionary(ClearBeenHere,0); // write out compiled data // write how many topics were found (for when we preload during normal startups) sprintf(filename,(char*)"%s/BUILD%s/script%s.txt", topicfolder,baseName,baseName); out = FopenUTF8WriteAppend(filename,(char*)"rb+"); if (out) { fseek(out,0,SEEK_SET); sprintf(word,(char*)"%05d",topicCount); unsigned char bom[3]; bom[0] = 0xEF; bom[1] = 0xBB; bom[2] = 0xBF; fwrite(bom,1,3,out); fwrite(word,1,5 * sizeof(char),out); fclose(out); // dont use Fclose } if (hasPlans) { sprintf(filename,(char*)"%s/BUILD%s/plans%s.txt", topicfolder,baseName,baseName); out = FopenUTF8WriteAppend(filename,(char*)"rb+"); if (out) { char word[MAX_WORD_SIZE]; fseek(out,0,SEEK_SET); sprintf(word,(char*)"%05u",hasPlans); fwrite(word,1,5 * sizeof(char),out); fclose(out); // dont use FClose } } // we delay writing out keywords til now, allowing multiple accumulation across tables and concepts WalkDictionary(WriteConcepts, build); WalkDictionary(ClearBeenHere, 0); // dump variables, dictionary changes, topic facts sprintf(filename,(char*)"%s/BUILD%s/facts%s.txt", topicfolder,baseName,baseName); char filename1[MAX_WORD_SIZE]; sprintf(filename1,(char*)"%s/BUILD%s/dict%s.txt", topicfolder,baseName,baseName); char filename2[MAX_WORD_SIZE]; sprintf(filename2, (char*)"%s/BUILD%s/variables%s.txt", topicfolder, baseName, baseName); FILE* dictout = FopenUTF8Write(filename1); FILE* factout = FopenUTF8Write(filename); FILE* varout = FopenUTF8Write(filename2); WriteExtendedFacts(factout,dictout, varout, build); fclose(dictout); // dont use FClose fclose(varout); // dont use FClose // FClose(factout); closed from within writeextendedfacts FreeOutputBuffer(); return CompileCleanup(output,oldtokenControl, build); } char* CompileString(char* ptr) // incoming is: ^"xxx" or ^'xxxx' { char tmp[MAX_WORD_SIZE * 2]; strcpy(tmp,ptr); // protect copy from multiple readcalls size_t len = strlen(tmp); if (tmp[len-1] != '"' && tmp[len-1] != '\'') BADSCRIPT((char*)"STRING-1 String not terminated with doublequote %s\r\n",tmp) tmp[len-1] = 0; // remove trailing quote // flip the FUNCTION marker inside the string static char data[MAX_WORD_SIZE * 2]; char* pack = data; *pack++ = '"'; *pack++ = FUNCTIONSTRING; *pack++ = ':'; // a internal marker that is has in fact been compiled - otherwise it is a format string whose spaces count but cant fully execute if (tmp[2] == '(') ReadPattern(tmp+2,NULL,pack,false,false); // incoming is: ^"(xxx" else ReadOutput(false,false,tmp+2,NULL,pack,NULL,NULL,NULL); TrimSpaces(data,false); len = strlen(data); data[len] = '"'; // put back closing quote data[len+1] = 0; return data; } #endif #endif