June 01, 1993
URL:http://drdobbs.com/natural-language-expansions-for-tense-an/184402726
Sentence: Jim should have run in the race Response: OK Sentence: Jim had been running in the race Response: OK Sentence: Jim is running in the race Response: OK Sentence: Where did Jim run Response: Jim had been running in the race Sentence: Where would Jim run Response: Jim had been running in the race Sentence: Where should Jim run Response: I'm not sure, but Jim should have run in the race Sentence: Where will Jim run Response: I'm not sure, but Jim is running in the race
field columns ----- ------- word 1-24 word type 25-28 usage 30 tense 31-34 number 42-43 auxiliary meaning 45-47 verb root 47-60
/* Copyright (c) 1993 Russell Suereth */ #include "natural.h" void check_aux_verb(void); void check_number(void); void make_response(void); void make_answer(int); void get_verb(char, char, char); int match_verb(char, char, char); char response[200]; unsigned char verb_tense[5]; unsigned char verb_number[5]; unsigned char verb_usage; unsigned char aux_tense[5]; unsigned char aux_number[5]; unsigned char aux_usage; unsigned char subject_number; unsigned char tenses[20]; unsigned char numbers[20]; unsigned char usages[20]; unsigned char subjects_type[20]; unsigned char aux_meaning[20] [5]; char auxiliaries[20][25]; /*****************************************************/ /* Compare the passed word with the word in the */ /* current dictionary record. If they are the same, */ /* then extract the type (NOUN, VERB, etc.). If the */ /* type is PRON, then extract pronoun information. */ /* If the type is VERB, then extract verb */ /* information. */ /*****************************************************/ int match_record(char *pass_word, int types) { int i, j; char *root; char *dic_word; dic_word = extract_word(); /* Check if passed word equals dictionary word */ if (strcmpi(pass_word, dic_word) != 0) return(1); /* Word found, get the type */ for (i=24,j=0; i<28; i++) { if (isspace(dic_record[i])) break; type_array [word_ct] [types] [j++] = dic_record [i]; } /* Trim the type */ type_array[word_ct] [types][j] = '\0'; if (strcmp(type_array[word_ct][types], "PRON") == 0) subject_number = dic_record[41]; if (strcmp(type_array[word_ct][types], "VERB") == 0) { root = extract_root(); strcpy(root_array[word_ct], root); verb_usage = dic_record[29]; for (i=30,j=0; i<34; i++,j++) { if (isspace(dic_record[i])) break; verb_tense[j] = dic_record[i]; } verb_tense[j] = '\0'; for (i=41,j=0; i<43; i++,j++) { if (isspace(dic_record[i])) break; verb_number[j] = dic_record[i]; } verb_number[j] = '\0'; } return(0); } /*****************************************************/ /* Determine if the input sentence contains a known, */ /* underlying structure. If it does, then assign the */ /* correct types and phrases for the words. */ /*****************************************************/ int check_underlying() { int i = 0; /* Structure WH-AUX-PRON-VERB */ if ( (check_type("WH", i) == 0) && (check_type("AUX", i+1) == 0) && (check_type("PRON", i+2) == 0) && (check_type("VERB", i+3) == 0) ) { strcpy(prime_types[i], "WH"); strcpy(prime_types[i+1], "AUX"); strcpy(prime_types[i+2], "PRON"); strcpy(prime_types[i+3], "VERB"); strcpy(phrases[i], "WHQUESTION"); strcpy(phrases[i+1], "VERBPHRASE"); strcpy(phrases[i+2], "NOUNPHRASE"); strcpy(phrases[i+3], "VERBPHRASE"); strcpy(auxiliaries[sentence], word_array[i+1]); get_aux(); return(0); } /* Structure PRON-AUX-VERB-PREP-DET-NOUN */ if ( (check_type("PRON", i) == 0) && (check_type("AUX", i+1) == 0) && (check_type("VERB", i+2) == 0) && (check_type("PREP", i+3) == 0) && (check_type("DET", i+4) == 0) && (check_type("NOUN", i+5) == 0) ) { strcpy(prime_types[i], "PRON"); strcpy(prime_types[i+1], "AUX"); strcpy(prime_types[i+2], "VERB"); strcpy(prime_types[i+3], "PREP"); strcpy(prime_types[i+4], "DET"); strcpy(prime_types[i+5], "NOUN"); strcpy(phrases[i], "NOUNPHRASE"); strcpy(phrases[i+1], "VERBPHRASE"); strcpy(phrases[i+2], "VERBPHRASE"); strcpy(phrases[i+3], "PREPPHRASE"); strcpy(phrases[i+4], "PREPPHRASE"); strcpy(phrases[i+5], "PREPPHRASE"); strcpy(auxiliaries[sentence], word_array[i+1]); get_aux(); return(0); } /* Structure WH-AUX-NAME-VERB */ if ( (check_type("WH", i) == 0) && (check_type("AUX", i+1) == 0) && (check_type("NAME", i+2) == 0) && (check_type("VERB", i+3) == 0) ) { strcpy(prime_types[i], "WH"); strcpy(prime_types[i+1], "AUX"); strcpy(prime_types[i+2], "NAME"); strcpy(prime_types[i+3], "VERB"); strcpy(phrases[i], "WHQUESTION"); strcpy(phrases[i+1], "VERBPHRASE"); strcpy(phrases[i+2], "NOUNPHRASE"); strcpy(phrases[i+3], "VERBPHRASE"); strcpy(auxiliaries[sentence], word_array[i+1]); get_aux(); return (0); } /* Structure NAME-AUX-AUX-AUX-VERB-PREP-DET-NOUN */ if ( (check_type("NAME", i) == 0) && (check_type("AUX", i+1) == 0) && (check_type("AUX", i+2) == 0) && (check_type("AUX", i+3) == 0) && (check_type("VERB", i+4) == 0) && (check_type("PREP", i+5) == 0) && (check_type("DET", i+6) == 0) && (check_type("NOUN", i+7) == 0) ) { strcpy(prime_types[i], "NAME"); strcpy(prime_types[i+l], "AUX"); strcpy(prime_types[i+2], "AUX"); strcpy(prime_types[i+3], "AUX"); strcpy(prime_types[i+4], "VERB"); strcpy(prime_types[i+5], "PREP"); strcpy(prime_types[i+6], "DET"); strcpy(prime_types[i+7], "NOUN"); strcpy(phrases [i], "NOUNPHRASE"); strcpy(phrases[i+1], "VERBPHRASE"); strcpy(phrases[i+2], "VERBPHRASE"); strcpy(phrases[i+3], "VERBPHRASE"); strcpy(phrases[i+4], "VERBPHRASE"); strcpy(phrases[i+5], "PREPPHRASE"); strcpy(phrases[i+6], "PREPPHRASE"); strcpy(phrases[i+7], "PREPPHRASE"); strcpy(auxiliaries[sentence], word_array[i+1]); strcat(auxiliaries[sentence], " "); strcat(auxiliaries[sentence], word_array[i+2]); strcat(auxiliaries[sentence], " "); strcat(auxiliaries[sentence], word_array[i+3]); get_aux(); return(0); } /* Structure NAME-AUX-AUX-VERB-PREP-DET-NOUN */ if ( (check_type("NAME", i) == 0) && (check_type("AUX", i+1) == 0) && (check_type("AUX", i+2) == 0) && (check_type("VERB", i+3) == 0) && (check_type("PREP", i+4) == 0) && (check_type("DET", i+5) == 0) && (check_type("NOUN", i+6) == 0) ) { strcpy(prime_types[i], "NAME"); strcpy(prime_types[i+1], "AUX"); strcpy(prime_types[i+2], "AUX"); strcpy(prime_types[i+3], "VERB"); strcpy(prime_types[i+4], "PREP"); strcpy(prime_types[i+5], "DET"); strcpy(prime_types[i+6], "NOUN"); strcpy(phrases[i], "NOUNPHRASE"); strcpy(phrases[i+1], "VERBPHRASE"); strcpy(phrases[i+2], "VERBPHRASE"); strcpy(phrases[i+3], "VERBPHRASE"); strcpy(phrases[i+4], "PREPPHRASE"); strcpy(phrases[i+5], "PREPPHRASE"); strcpy(phrases[i+6], "PREPPHRASE"); strcpy(auxiliaries[sentence], word_array[i+1]); strcat(auxiliaries[sentence], " "); strcat(auxiliaries[sentence], word_array[i+2]); get_aux(); return(0); } /* Structure NAME-AUX-VERB-PREP-DET-NOUN */ if ( (check_type("NAME", i) == 0) && (check_type("AUX", i+1) == 0) && (check_type("VERB", i+2) == 0) && (check_type("PREP", i+3) == 0) && (check_type("DET", i+4) == 0) && (check_type("NOUN", i+5) == 0) ) { strcpy(prime_types[i], "NAME"); strcpy(prime_types[i+1], "AUX"); strcpy(prime_types[i+2], "VERB"); strcpy(prime_types[i+3], "PREP"); strcpy(prima_types[i+4], "DET"); strcpy(prime_types[i+5], "NOUN"); strcpy(phrases[i], "NOUNPHRASE"); strcpy(phrases[i+1], "VERBPHRASE"); strcpy(phrases[i+2], "VERBPHRASE"); strcpy(phrases[i+3], "PREPPHRASE"); strcpy(phrases[i+4], "PREPPHRASE"); strcpy(phrases[i+5], "PREPPHRASE"); strcpy(auxiliaries[sentence], word_array [i +1]); get_aux (); return(0); } /* Structure NAME-VERB-PREP-DET-NOUN */ if ( (check_type("NAME", i) == 0) && (check_type("VERB", i+1) == 0) && (check_type("PREP", i+2) == 0) && (check_type("DET", i+3) == 0) && (check_type("NOUN", i+4) == 0) ) { strcpy(prime_types[i], "NAME"); strcpy(prime_types[i+1], "VERB"); strcpy(prime_types[i+2], "PREP"); strcpy(prime_types[i+3], "DET"); strcpy(prime_types[i+4], "NOUN"); strcpy(phrases[i], "NOUNPHRASE"); strcpy(phrases[i+1], "VERBPHRASE"); strcpy(phrases[i+2], "PREPPHRASE"); strcpy(phrases[i+3], "PREPPHRASE"); strcpy(phrases[i+4], "PREPPHRASE"); return (0); } return(1); } /*****************************************************/ /* Compare the passed type with all the types for */ /* this word in the type_array. If the type is */ /* found, then return 0. The pass_number parameter */ /* identifies the word in the input sentence. */ /*****************************************************/ int check_type(char *pass_type, int pass_number) { int i; for (i=0; type_array[pass_number][i][0]; i++) { if (strcmp(type_array[pass_number][i], pass_type) == 0) /* Passed type is found in array */ return (0); } /* Passed type is not found in array */ return(1); } /*****************************************************/ /* If the correct type is "NAME" or "PRON" then the */ /* word refers to a subject so copy the word to the */ /* subjects array. */ /*****************************************************/ void check_subject() { int i; for (i=0; i<word_ct; i++) { if (strcmp(prime_types[i], "NAME") == 0) { strcpy(subjects[sentence], word_array[i]); subject_number = SINGULAR; subjects_type[sentence] = NAME; break; } if (strcmp(prime_types[i], "PRON") == 0) { strcpy(subjects[sentence], word_array[i]); subjects_type[sentence] = PRONOUN; break; } } return; } /*****************************************************/ /* Determine the sentence tense and usage by */ /* matching auxiliary and verb information, or by */ /* matching previous sentence information. */ /*****************************************************/ void check_aux_verb() { int i, j, matches; char *result; char temp_tenses[5]; /*************************************************/ /* Auxiliary in sentence */ /*************************************************/ if (strlen(auxiliaries[sentence]) > 0) { if (aux_usage != verb_usage) { tenses[sentence] = UNKNOWN; usages[sentence] = UNKNOWN; return; } for (i=0,j=0,matches=0; aux_tense[i]; i++) { if ((result = strchr(verb_tense,aux_tense[i])) != NULL) { temp_tenses[j++] = *result; matches++; } } temp_tenses[j] = '\0'; if (matches == 0) { tenses[sentence] = UNKNOWN; usages[sentence] = UNKNOWN; return; } usages[sentence] = aux_usage; if (matches == 1) { tenses[sentence] = temp_tenses[0]; return; } for (i=sentence-1; i>=0 && i>=sentence-3; i--) { if ((strcmpi(subjects[i], subjects[sentence]) == 0) && (strcmpi(actions[i], actions[sentence]) == 0) && (strchr(temp_tenses, tenses[i]) != NULL) && (strlen(places[i]) > 0)) { tenses[sentence] = tenses[i]; return; } } tenses[sentence] = PRESENT; return; } /**************************************************/ /* No auxiliary in sentence */ /**************************************************/ usages[sentence] = verb_usage; if (strchr(verb_tense, PAST) != NULL) { tenses[sentence] = PAST; return; } /**************************************************/ /* No auxiliary, verb tense is present or future */ /**************************************************/ for (i=sentence-1; i>=0 && i>=sentence-3; i--) { if ((strcmpi(subjects[i], subjects[sentence]) == 0) && (strcmpi(actions[i], actions[sentence]) == 0) && (strchr(verb_tense, tenses[i]) != NULL) && (strlen(places[i]) > 0)) { tenses[sentence] = tenses[i]; return; } } tenses[sentence] = PRESENT; return; } /*****************************************************/ /* Match the subject, verb, and auxiliary number. */ /* If the match is successful, then the sentence */ /* number is the matched number. */ /*****************************************************/ void check_number() { if (strchr(verb_number, subject_number) == NULL) { numbers[sentence] = UNKNOWN; return; } if ((strten(auxiliaries[sentence]) > 0) && (strchr(aux_number, subject_number) == NULL)) { numbers[sentence] = UNKNOWN; return; } numbers[sentence] = subject_number; return; } /*****************************************************/ /* Read the dictionary to extract the auxiliary */ /* information. */ /*****************************************************/ void get_aux() { rewind(infile); fgets(dic_record, 80, infile); while (! feof(infile)) { if (match_aux() == 0) return; fgets(dic_record, 80, infile); } return; } /*****************************************************/ /* If the sentence auxiliary matches the word in the */ /* current dictionary record, then extract the */ /* auxiliary information from the dictionary. */ /*****************************************************/ int match_aux() { int i,j; char *dic_word; dic_word = extract_word(); if (strcmpi(auxiliaries[sentence], dic_word) != 0) return (1); aux_usage = dic_record[29]; for (i=30,j=0; i<34; i++,j++) { if (isspace(dic_record[i])) break; aux_tense[j] = dic_record[i]; } /* Trim the tense */ aux_tense[j] = '\0'; for (i=41,j=0; i<43; i++,j++) { if (isspace(dic_record[i])) break; aux_number[j] = dic_record[i]; } /* Trim the number*/ aux_number[j] = '\0'; for (i=44,j=0; i<47; i++,j++) { if (isspace(dic_record[i])) break; aux_meaning [sentence] [j] = dicrecord [i]; } return(0); } /*****************************************************/ /* Generate a response with information from a */ /* matching, previous sentence. */ /*****************************************************/ void make_response() { int i; /***************************************************/ /* Input sentence is not asking for information. */ /***************************************************/ if (strcmpi(word_array[0], "where") != 0) { strcpy(response, "OK"); return; } /***************************************************/ /* Match subject, action, tense, and meaning. */ /***************************************************/ for (i=sentence-1; i>=0; i--) { if ((strcmpi(subjects[i],subjects[sentence])==0) && (strcmpi(actions[i], actions[sentence]) ==0) && (strlen(places[i]) > 0) && (tenses[i] == tenses[sentence]) && (strpbrk(aux_meaning[i],aux_meaning[sentence]) != NULL)) { make_answer(i); return; } } /***************************************************/ /* Match subject, action, and tense. */ /***************************************************/ for (i=sentence-1; i>=0; i--) { if ((strcmpi(subjects[i],subjects[sentence])==0) && (strcmpi(actions[i], actions[sentence]) ==0) && (strlen(places[i]) > 0) && (tenses[i] == tenses[sentence])) { make_answer (i); return; } } /***************************************************/ /*Match subject, action, and meaning. */ /***************************************************/ for (i=sentence-1; i>=0; i--) { if ((strcmpi(subjects[i],subjects[sentence])==0) && (strcmpi(actions[i], actions[sentence]) ==0) && (strlen(places[i]) > 0) && (strpbrk(aux_meaning[i],aux_meaning[sentence]) != NULL)) { strcpy(response, 'I'm not sure, but "); make_answer(i); return; } } /***************************************************/ /* Match subject and action. */ /***************************************************/ for (i=sentence-1; i>=0; i--) { if ((strcmpi(subjects[i],subjects[sentence])==0) && (strcmpi(actions[i], actions[sentence]) ==0) && (strlen(places[i]) > 0)) { strcpy(response, 'I'm not sure, but "); make_answer(i); return; } } strcpy(response, "I don't know"); return; } /*****************************************************/ /* Move information from a previous sentence to the */ /* response. */ /*****************************************************/ void make_answer(int prev_sentence) { if (subjects_type[prev_sentence] == PRONOUN) { if (strlen(response) == 0) { subjects[prev_sentence][0] = (char) toupper(subjects [prev_sentence] [0] ); } else { subjects[prev_sentence] [0] = (char) tolower(subjects[prev_sentence][0]); } } strcat(response, subjects[prev_sentence]); strcat(response, " "); if (strlen(auxiliaries[prev_sentence]) > 0) { strcat(response, auxiliaries[prev_sentence]); strcat(response, " "); } get_verb(tenses[prev_sentence], numbers[prev_sentence], usages[prev_sentence]); strcat(response, places[prev_sentence]); return; } /*****************************************************/ /* Get the correct verb from the dictionary. */ /*****************************************************/ void get_verb(char pass_tense, char pass_number, char pass_usage) { rewind{infile); fgets(dic_record, 80, infile); while (! feof(infile)) { if (match_verb(pass_tense, pass_number, pass_usage) == 0) break; fgets(dic_record, 80, infile); } return; } /*****************************************************/ /* If the verb information in the current record */ /* matches the passed information, then move the */ /* correct verb to the response. */ /*****************************************************/ int match_verb(char pass_tense, char pass_number, char pass_usage) { int i; char *root; char *dic_word; root = extract_root(); /* Match verb with root */ if (strcmpi(actions[sentence], root) == 0) { /* Match verb with tense */ for (i=301 i<34; i++) { if (isspace(dic_record[i])) return(1); if (dic_record[i] -= pass_tense) break; } /* Match verb with number */ for (i=41; i<43; i++) { if (isspace(dic_record[i])) return(1); if (dic_record[i] == pass_number) break; } /* Match verb with usage */ if (dic record[29] == pass_usage) { dic_word = extract_word(); strcat(response, dic_word); return(0); } } return(1); } /* End of File */
/* natural.h Copyright (c) 1993 Russell Suereth */ #define UNKNOWN 200 #define PRONOUN 208 #define NAME 209 #define AUX_VERB_ERROR 210 /*****************************************************/ /* The following definitions are dictionary values */ /*****************************************************/ /* Usage */ #define ING 73 /* I */ #define NOAUX 78 /* N */ #define ROOT 82 /* R */ /* Tense */ #define PAST 48 /* 0 */ #define PRESENT 54 /* 6 */ #define FUTURE 57 /* 9 */ /* Number */ #define SINGULAR 64 /* @ */ #define PLURAL 36 /* $ */ /* Meaning for auxiliaries */ #define LIMITED_DURATION 65 /* A */ #define PARTICULAR_POINT_OF_TIME 66 /* B */ #define UP_TO_PRESENT 67 /* C */ #define NOT_COMPLETED 68 /* D */ #define CAN 69 /* E */ #define COULD 70 /* F */ #define MAY 71 /* G */ #define POSSIBILITY 72 /* H */ #define OBLIGATION 73 /* I */ #define WILL 74 /* J */ #define WOULD 75 /* K */ #define MUST 76 /* L */ #define FIXED_PLAN 77 /* M */ /* End of File */
a DET the DET house NOUN street NOUN store NOUN race NOUN we PRON $ he PRON @ she PRON @ it PRON @ they PRON $ run VERB R069 @$ run runs VERB N6 @ run running VERB I069 @$ run ran VERB NO @$ run walk VERB R069 @$ walk walks VERB N6 @ walk walking VERB I069 @$ walk walked VERB NO @$ walk to PREP in PREP on PREP where WH was AUX I0 @ D were AUX I0 $ D did AUX R0 @$ B had AUX R0 @$ B has AUX R06 @ C have AUX R06 $ C had been AUX I0 @$ B has been AUX I0 @ C have been AUX I0 $ C could AUX R09 @$ F would AUX R0 @$ K could have AUX R0 @$ F would have AUX R0 @$ K could have been AUX I0 @$ F would have been AUX I0 @$ K should have AUX R0 @$ I should have been AUX I0 @$ I may have AUX R0 @$ G might have AUX R0 @$ H may have been AUX I0 @$ G might have been AUX I0 @$ H must have been AUX I0 @$ L is AUX I69 @ AM be AUX been AUX are AUX I69 $ AM can AUX R69 @$ E can be AUX I69 @$ E will AUX R9 @$ J will be AUX I9 @$ J could be AUX I69 @$ F should AUX R9 @$ I should be AUX I9 @$ I would be AUX I9 @$ K must AUX R9 @$ L must be AUX I9 @$ L may AUX R9 @$ G may be AUX I69 @$ G might AUX R9 @$ H might be AUX I9 @$ H /* End of File */
Russell Suereth has been consulting for over 12 years in the New York City and Boston areas. He started designing and coding systems on IBM mainframes and now also builds PC software systems. You can write to Russell at 84 Old Denville Rd, Boonton, NJ 07005, or call him at (201) 334-0051.
This article expands the natural language processor presented in "A Natural Language Processor" (CUJ, April 1993) to include tense and number. Tense and number help determine the grammatical usage of auxiliaries and verbs, and derive meaning from the sentence. Tense indicates the time of the action or state: past, present, or future. Number indicates how many: one or more than one. These simple meanings help identify similar information between sentences. The processor uses similar information in sentences to generate an appropriate response.
I added and expanded several processes to implement tense and number. This natural language processor can now accept input sentences with auxiliary combinations, identify tense and number, and use meaning to generate grammatical responses. These expansions enable the natural language processor to process tense and number, and to respond more correctly.
The processor uses auxiliary and verb information from the dictionary to help identify tense and number. Auxiliaries, words such as is, have, and would, can be combined to create other auxiliaries such as would have and would have been. Underlying structures identify auxiliary combinations in the input sentence.
Underlying structures define the kinds of input sentences that can be processed. The original processor had two underlying structures that defined two kinds of input sentences. In this article, I expand the underlying structures to accept auxiliary combinations. For instance, the structure NAME-AUX-VERB-PREP-DET-NOUN accepted Jim could run in the race. I expanded the structure to NAME-AUX-AUX-VERB-PREP-DET-NOUN and NAME-AUX-AUX-AUX-VERB-PREP-DET-NOUN. These structures accept Jim could be running in the race and Jim could have been running in the race.
Listing 1 and Listing 2 show only the expanded code for the original processor. This expanded code can be a model for processing tense and number, or added to the original processor. You can get the complete processor code, including the expansions for tense and number from the various sources for CUJ online source code (see page 6).
check_underlying contains the expanded structures. check_type matches the underlying structure to the input sentence. If the input sentence has the underlying structure, check_underlying concatenates the auxiliary words to the auxiliaries array. Then get_aux is called to find the auxiliary in the dictionary.
The dictionary contains the auxiliary tense, number, meaning, and usage. get_aux reads each dictionary record and calls match_aux to match the auxiliary with the dictionary word. If the match is successful, match_aux extracts the tense, number, meaning, and usage for later use. The natural language processor uses this information to match the input sentence auxiliary with the verb, and to retrieve the correct verb when generating a response.
The existence of a matching, underlying structure determines the processor's success in understanding an input sentence. The coded, underlying structures could be expanded further with code for phrase structures such as AUX, AUX-AUX, and AUX-AUX-AUX. Such an expansion would reduce the number of coded, underlying structures but would accept more kinds of input sentences.
The processor also extracts verb and pronoun information from the dictionary to later help determine that an input sentence is grammatical, and retrieve the correct verb for a generated response. match_record extracts the verb and pronoun information according to the word type. That is, for the word type VERB, the processor extracts the verb root, usage, tense, and number. For the word type PRON, it extracts only the pronoun number.
The dictionary in this expanded processor has new words and information for processing tense and number, and a new layout for the new information. Listing 3 shows the new dictionary. Figure 2 shows the dictionary layout.
The processor determines tense primarily by matching auxiliary tenses with verb tenses. For example, in the sentence Jim had run in the race, the processor matches the tenses of the auxiliary had with the tenses of the verb run. The auxiliary had is defined in the dictionary as past tense. The verb run is defined as past, present, and future. The past tense matches successfully and so the sentence is past tense. If no match is successful, then the auxiliary and verb don't agree and the sentence is in error.
check_aux_verb matches the auxiliary with the verb. The routine first processes sentences that have an auxiliary. If the auxiliary and verb usage don't match, then the sentence tense and usage are unknown. If the auxiliary and verb usage match, then each auxiliary tense is matched with each verb tense. If the number of successful tense matches is one, then the matched tense and usage are assigned to the sentence tense and usage.
The usage helps identify the correct verb that can be used with the auxiliary. Some kinds of verbs can't be used with a specific auxiliary. A correct verb is in was running and an incorrect verb is in was run. The verb run has a ROOT usage, was and running have an ING usage. The ING usage identifies a verb that ends with ing, NOAUX identifies a verb with no auxiliary, and ROOT identifies the main form of the verb. If the auxiliary and verb usage match, then that verb may be used with the auxiliary.
Some auxiliary and verb combinations have more than one tense match. Consider, for example, the sentence Jim can run in the race. The auxiliary can is in the dictionary as present and future tense. The verb run is in the dictionary as past, present, and future tense. This auxiliary and verb combination may be present or future tense. More than one tense match causes the tense to be unclear. In this case, you must use an alternative method to determine tense.
Input sentence context determines tense when auxiliary and verb tenses have more than one match. The processor determines sentence context by analyzing information in previous sentences. Look at the sentences Jim will run in the finals, and Jim can run in the first lane. The second sentence tense is unclear when only that sentence is analyzed. The auxiliary can is in the dictionary as present and future tense, and run is in the dictionary as past, present, and future. The second sentence may be present or future tense. But when the tense of the first sentence is also analyzed, then the second sentence tense can be determined and becomes future tense.
However, a previous sentence tense may be irrelevant or forgotten if that previous sentence occurred long ago. For example, the speaker says Jim will run in the finals and fifteen sentences later says Jim can run in the first lane. The listener may not be sure first lane refers to the finals. The sentence about the finals may even be forgotten. When no recent sentence indicates the tense, the listener assumes the tense is present.
check_aux_verb looks at the previous three sentences when the number of successful tense matches is greater than one. The current sentence's subject, action, and possible tenses are matched with the previous three sentences' subject, action, and tense. If a match is successful, then the matching sentence tense is assigned to the current sentence tense. If no match is successful, then the current sentence tense is present.
Some sentences don't have an auxiliary. In these sentences, the processor uses the verb to determine the input sentence tense. When the verb is past tense, then the sentence is past tense. The sentence Jim ran in the race is past tense. When the verb is not past tense, then the sentence tense is unclear. The sentence Jim runs in the race may be present or future tense. The previous three sentences are used to determine the tense when the tense is unclear.
check_aux_verb also processes tense when the input sentence has no auxiliary. The current sentence's subject, action, and possible verb tenses are matched with the previous three sentence's subject, action, and tense. If a match is successful, then the previous sentence tense is assigned to the current sentence tense. If no match is successful, then the current sentence tense is present.
The sentence subject determines number. The number can be singular or plural depending on whether the subject refers to one or more than one. The sentence He runs in the race shows a singular subject, They run in the race shows a plural subject. The auxiliary, verb, and subject number in a grammatical sentence must be all singular or all plural.
Many verb forms are both singular and plural. For example, running is singular in He is running in the race, and plural in They are running in the race. Many auxiliaries also can be singular and plural. For example, could be is singular in He could be running in the race, and plural in They could be running in the race. But some auxiliaries and verbs can be only singular or plural. check_number matches the auxiliary, verb, and subject number. If the match is successful, the matched number is assigned to the sentence number. If the match is unsuccessful, the sentence number is unknown.
Number is used to identify the correct verb in a generated response. Using number enables the correct verb to be extracted from the dictionary. The sentence Jim runs in the race has a correct verb because Jim and runs are singular. A subject and verb that agree help make the response grammatical and effective.
The original processor generated only two kinds of responses. It returned a simple OK when given a statement, and someone's location when given a question. The previous sentence that matched the same subject and action words gave the location for the response. Previous sentences were matched by the same words to identify similar information. But similar information is often determined by meaning rather than by the same words.
The expanded response process uses tense and the auxiliary's meaning to help identify similar information, making the natural language processor sound more human. Figure 1 shows an example. The processor responded with correct information because it used tense and auxiliary meaning when it matched the sentences. The tense matched because the two sentences refer to past tense. The auxiliary meaning matched because did and had been mean PARTICULAR_POINT_OF_TIME.
The processor currently assigns auxiliary meaning only when the auxiliary exists. Auxiliary meaning helps match similar information between sentences. A sentence without an auxiliary meaning can't be properly matched to another sentence. A further processor expansion can derive an auxiliary meaning from the sentence when no auxiliary exists. The derived auxiliary meaning will allow a sentence without an auxiliary to be properly matched.
When the processor generates a response, it searches in previous sentences for similar information. The processor uses the most accurate information in the response. There are four separate matches the processor uses to find information for the response. The first match has the highest probability that the information is accurate, the last match has the lowest probability that the information is accurate. People in conversation respond in a similar manner. A person may not have enough knowledge for a correct response. But that person may create an alternative response to show knowledge about the information.
The first match criterion is subject, action, tense, and auxiliary meaning. Subject and action are always in the match criteria for the response. This ensures that all matched sentences have the same subject and action. The sentences Jim had been running in the race and Where did Jim run? match because had been and did have the same tense and meaning. make_response matches information in the current sentence with information in previous sentences. In the first match, the subjects, actions, tenses, and aux_meaning arrays match the current sentence to a previous sentence. When a match is successful, make_answer generates a response with information from the previous, matched sentence (see Figure 1) .
The second match criterion is subject, action, and tense. The subjects, actions, and tenses arrays match the current sentence to a previous sentence. Here, the sentences Jim had been running in the race and Where would Jim run? match because had been and would have the same tense.
The third match criterion is subject, action, and auxiliary meaning. The subjects, actions, and aux_meaning arrays match the current sentence to a previous sentence. The sentences Jim should have run in the race and Where should Jim run? match because should and should have have the same meaning. The tense is different, and so the response may not give the correct information. Because of this, the processor prefaces the response with I'm not sure, but.
The fourth match criterion is the same as in the original processor. Only subject and action are matched. The subjects and actions arrays match the current sentence to a previous sentence. The sentences Jim is running in the race and Where will Jim run? match because they have the same subject and action. But the tense and auxiliary meaning don't match so the response may be incorrect. Because of this, the processor prefaces the response with I'm not sure, but. When the four matches are unsuccessful, the processor can't find the similar information. Then make_response moves the statement I don't know to the response.
The correct verb helps ensure a grammatical response. make_answer generates a response by moving appropriate words to the response variable. The appropriate subject and auxiliary are first moved to the response. Then get_verb is called to extract the correct verb and move it to the response. get_verb reads each record in the dictionary and calls match_verb to find the correct verb. match_verb matches the passed tense, number, and usage with the tense, number, and usage of the current dictionary record. When a match is successful, the correct verb is extracted from the dictionary and moved to the response. After the verb is extracted, the place where the action occurred is also moved to the response.
The processor compares several word types to determine that the sentence agrees in tense and number. Another word type that must agree in number is the pronoun, words such as he, she, and they. A pronoun can replace the name in the sentence, and be the sentence subject. Pronouns must be defined in the underlying structures, be identified as singular or plural, and be used properly in a response.
The structure PRON-AUX-VERB-PREP-DET-NOUN (Listing 1) is for a statement, and WH-AUX-PRON-VERB (Listing 1) is for a question. These structures allow the input sentences He is running in the race and Where did he run?
Number is extracted from the dictionary and identifies that the pronoun is singular or plural. match_record extracts the pronoun number and assigns it to the subject number. Subject number is used to determine number agreement and to extract the correct verb.
Pronouns in a generated response must have their first letter changed to upper or lower case. If the pronoun is the first word in the response, then it must be uppercase. If the pronoun is not the first word, then it must be lower case. make_answer has the code for the pronoun in a response. The subjects_type array has an entry for each sentence. Each entry identifies that the subject is a name or a pronoun. If the response's subject is a pronoun, then the subject's first letter is changed. check_subject assigns values to the subjects_type array. Pronoun letter change helps the processor use the pronoun properly in a response.
Several processes are required to process tense and number. The processes described in this article extract auxiliary and verb information; determine tense from the auxiliary and verb, a previous sentence, or with no auxiliary; determine number; match sentences for the response; extract the correct verb; and process the pronoun. Each of these processes is an expansion of the original natural language processor.
Further expansions to the processor can process time features. These expansions identify time words such as last week, match time meaning with auxiliary meaning, generate a response from time meaning, and generate a response to explain time and number errors.
The processes described in this article expand the natural language processor to include tense and number. Tense and number are used to extract the correct verb for a grammatical response. Tense and the auxiliary are used to match similar information between sentences. This match enables the processor to generate a response based on word meaning. These expansions provide the processor with more accurate responses.
(C) Copyright 1993 Russell Suereth.
Liles, Bruce L. 1971. An Introductory Transformational Grammar. Englewood Cliffs: Prentice-Hall.
Quirk, Randolph, and Sidney Greenbaum. 1973. A Concise Grammar of Contemporary English. San Diego: Harcourt Brace Jovanovich.
Suereth, Russell. April 1993. "A Natural Language Processor." The C Users Journal. Lawrence, KS: R&D Publications.
Terms of Service | Privacy Statement | Copyright © 2024 UBM Tech, All rights reserved.