// // MPExpressionTokenizer.m // MathPad // // Created by Kim Wittenburg on 19.09.14. // Copyright (c) 2014 Kim Wittenburg. All rights reserved. // #import "MPExpressionTokenizer.h" #import "MPExpression.h" #import "MPExpression.h" #import "MPToken.h" #import "NSRegularExpression+MPParsingAdditions.h" #define MPRangeExists(range) (range.location != NSNotFound) @implementation MPExpressionTokenizer + (NSArray *)tokenizeExpression:(MPExpression *)expression { NSMutableArray *tokens = [[NSMutableArray alloc] init]; NSUInteger symbolIndex = 0; for (NSUInteger index = 0; index < [expression countItemsInReferenceFrame:MPElementReferenceFrame]; index++) { id element = [expression itemAtIndex:index referenceFrame:MPElementReferenceFrame]; if ([element isFunction]) { [tokens addObject:element]; } else { [tokens addObjectsFromArray:[self tokenizeElement:(NSString *)element elementSymbolIndex:symbolIndex]]; } symbolIndex += element.length; } return tokens; } + (NSArray *)tokenizeElement:(NSString *)element elementSymbolIndex:(NSUInteger)symbolIndex { NSUInteger lexLocation = 0; NSString *decimalSeparator = [NSRegularExpression escapedPatternForString:[[NSLocale currentLocale] objectForKey:NSLocaleDecimalSeparator]]; NSString *regexStringFormat = @"\\A(?:" @"([\\*∙⋅])|" @"([+-](?:\\s*[+-])*)|" @"((?:\\d+%@(?!\\d+))|(?:(?:\\d*%@){2,}\\d*)|%@(?!\\d+))|" // Substitute with decimal separator 3 times @"((?:\\d+(?:%@\\d+)?)|(?:%@\\d+))|" // Substitute with decimal separator 2 times @"(sin|cos|tan|asin|arcsin|acos|arccos|atan|arctan|lg|log|ln)|" @"([A-Za-z])|" @"(!)|" @"(=)|" @"(\\s+)" @")"; NSString *regexString = [NSString stringWithFormat:regexStringFormat, decimalSeparator, decimalSeparator, decimalSeparator, decimalSeparator, decimalSeparator]; NSRegularExpression *regex = [NSRegularExpression regularExpressionWithPattern:regexString options:0 error:NULL]; NSMutableArray *tokens = [[NSMutableArray alloc] init]; while (lexLocation < element.length) { NSTextCheckingResult *match = [regex firstMatchInString:element fromIndex:lexLocation]; NSRange range = NSMakeRange(lexLocation, 1); MPTokenType tokenType = MPUnidentifiedToken; if (match) { NSRange multiplicationSymbolRange = [match rangeAtIndex:1]; NSRange operatorRange = [match rangeAtIndex:2]; NSRange deformedNumberRange = [match rangeAtIndex:3]; NSRange numberRange = [match rangeAtIndex:4]; NSRange elementaryFunctionRange = [match rangeAtIndex:5]; NSRange variableRange = [match rangeAtIndex:6]; NSRange factorialRange = [match rangeAtIndex:7]; NSRange equalsRange = [match rangeAtIndex:8]; NSRange whitespaceRange = [match rangeAtIndex:9]; if (MPRangeExists(multiplicationSymbolRange)) { range = multiplicationSymbolRange; tokenType = MPMultiplicationSymbolToken; } else if (MPRangeExists(operatorRange)) { range = operatorRange; tokenType = MPOperatorListToken; } else if (MPRangeExists(deformedNumberRange)) { range = deformedNumberRange; tokenType = MPDeformedNumberToken; } else if (MPRangeExists(numberRange)) { range = numberRange; tokenType = MPNumberToken; } else if (MPRangeExists(elementaryFunctionRange)) { range = elementaryFunctionRange; tokenType = MPElementaryFunctionToken; } else if (MPRangeExists(variableRange)) { range = variableRange; tokenType = MPVariableToken; } else if (MPRangeExists(factorialRange)) { range = factorialRange; tokenType = MPFactorialToken; } else if (MPRangeExists(equalsRange)) { range = equalsRange; tokenType = MPEqualsToken; } else if (MPRangeExists(whitespaceRange)) { range = whitespaceRange; tokenType = MPWhitespaceToken; } else { // Should not get here range = NSMakeRange(lexLocation, 1); tokenType = MPUnidentifiedToken; } } lexLocation = NSMaxRange(range); NSString *tokenStringValue = [element substringWithRange:range]; range.location += symbolIndex; [tokens addObject:[[MPToken alloc] initWithTokenType:tokenType range:range stringValue:tokenStringValue]]; } return tokens; } @end