/* * Copyright (C) 2019-2022 Scoopta * This file is part of Wofi * Wofi is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Wofi is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Wofi. If not, see . */ #include #include #include // leading gap #define SCORE_GAP_LEADING -0.005 // trailing gap #define SCORE_GAP_TRAILING -0.005 // gap in the middle #define SCORE_GAP_INNER -0.01 // we matched the characters consecutively #define SCORE_MATCH_CONSECUTIVE 1.0 // we got a consecutive match, but insensitive is on // and we didn't match the case. #define SCORE_MATCH_NOT_MATCH_CASE 0.9 // we are matching after a slash #define SCORE_MATCH_SLASH 0.9 // we are matching after a space dash or hyphen #define SCORE_MATCH_WORD 0.8 // we are matching a camel case letter #define SCORE_MATCH_CAPITAL 0.7 // we are matching after a dot #define SCORE_MATCH_DOT 0.6 #define SWAP(x, y, T) \ do { \ T SWAP = x; \ x = y; \ y = SWAP; \ } while(0) #define max(a, b) (((a) > (b)) ? (a) : (b)) // matching static bool contains_match(const char* filter, const char* text, bool insensitive) { if(filter == NULL || strcmp(filter, "") == 0) { return true; } if(text == NULL) { return false; } if(insensitive) { return strcasestr(text, filter) != NULL; } else { return strstr(text, filter) != NULL; } } static char* strcasechr(const char* s,char c, bool insensitive) { if(insensitive) { const char accept[3] = {tolower(c), toupper(c), 0}; return strpbrk(s, accept); } else { return strchr(s, c); } } static bool fuzzy_match(const char* filter, const char* text, bool insensitive) { if(filter == NULL || strcmp(filter, "") == 0) { return true; } if(text == NULL) { return false; } // we just check that all the characters (ignoring case) are in the // search text possibly case insensitively in the correct order while(*filter != 0) { char nch = *filter++; if(!(text = strcasechr(text, nch, insensitive))) { return false; } text++; } return true; } static bool multi_contains_match(const char* filter, const char* text, bool insensitive) { if(filter == NULL || strcmp(filter, "") == 0) { return true; } if(text == NULL) { return false; } char new_filter[MAX_MULTI_CONTAINS_FILTER_SIZE]; strncpy(new_filter, filter, sizeof(new_filter)); new_filter[sizeof(new_filter) - 1] = '\0'; char* token; char* rest = new_filter; while((token = strtok_r(rest, " ", &rest))) { if(contains_match(token, text, insensitive) == false) { return false; } } return true; } bool match_for_matching_mode(const char* filter, const char* text, enum matching_mode matching, bool insensitive) { bool retval; switch(matching) { case MATCHING_MODE_MULTI_CONTAINS: retval = multi_contains_match(filter, text, insensitive); break; case MATCHING_MODE_CONTAINS: retval = contains_match(filter, text, insensitive); break; case MATCHING_MODE_FUZZY: retval = fuzzy_match(filter, text, insensitive); break; default: return false; } return retval; } // end matching // fuzzy matching static void precompute_bonus(const char* haystack, score_t* match_bonus) { /* Which positions are beginning of words */ int m = strlen(haystack); char last_ch = '\0'; for(int i = 0; i < m; i++) { char ch = haystack[i]; score_t score = 0; if(isalnum(ch)) { if(!last_ch || last_ch == '/') { score = SCORE_MATCH_SLASH; } else if(last_ch == '-' || last_ch == '_' || last_ch == ' ') { score = SCORE_MATCH_WORD; } else if(last_ch >= 'a' && last_ch <= 'z' && ch >= 'A' && ch <= 'Z') { /* CamelCase */ score = SCORE_MATCH_CAPITAL; } else if(last_ch == '.') { score = SCORE_MATCH_DOT; } } match_bonus[i] = score; last_ch = ch; } } static inline bool match_with_case(char a, char b, bool insensitive) { if(insensitive) { return tolower(a) == tolower(b); } else { return a == b; } } static inline void match_row(int row, score_t* curr_D, score_t* curr_M, const score_t* last_D, const score_t* last_M, const char* needle, const char* haystack, int n, int m, score_t* match_bonus, bool insensitive) { int i = row; score_t prev_score = SCORE_MIN; score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; for(int j = 0; j < m; j++) { if(match_with_case(needle[i], haystack[j], insensitive)) { score_t score = SCORE_MIN; if(!i) { // first line we fill in a row for non-matching score = (j * SCORE_GAP_LEADING) + match_bonus[j]; } else if(j) { /* i > 0 && j > 0*/ // we definitely match case insensitively already so if // our character isn't the same then we have a different case score_t consecutive_bonus = needle[i] == haystack[j] ? SCORE_MATCH_CONSECUTIVE : SCORE_MATCH_NOT_MATCH_CASE; score = max(last_M[j - 1] + match_bonus[j], /* consecutive match, doesn't stack with match_bonus */ last_D[j - 1] + consecutive_bonus); } curr_D[j] = score; curr_M[j] = prev_score = max(score, prev_score + gap_score); } else { curr_D[j] = SCORE_MIN; curr_M[j] = prev_score = prev_score + gap_score; } } } // Fuzzy matching scoring. Adapted from // https://github.com/jhawthorn/fzy/blob/master/src/match.c and // https://github.com/jhawthorn/fzy/blob/master/ALGORITHM.md // For a fuzzy match string needle being searched for in haystack we provide a // number score for how well we match. // We create two matrices of size needle_len (n) by haystack_len (m). // The first matrix is the score matrix. Each position (i,j) within this matrix // consists of the score that corresponds to the score that would be generated // by matching the first i characters of the needle with the first j // characters of the haystack. Gaps have a fixed penalty for having a gap along // with a linear penalty for gap size (c.f. gotoh's algorithm). // matches give a positive score, with a slight weight given to matches after // certain special characters (i.e. the first character after a `/` will be // "almost" consecutive but lower than an actual consecutive match). // Our second matrix is our diagonal matrix where we store the best match // that ends at a match. This allows us to calculate our gap penalties alongside // our consecutive match scores. // In addition, since we only rely on the current, and previous row of the // matrices and we only want to compute the score, we only store those scores // and reuse the previous rows (rather than storing the entire (n*m) matrix). // In addition we've simplified some of the algorithm compared to fzy to // improve legibility. (Can reimplement lookup tables later if wanted.) // Also, the reference algorithm does not take into account case sensitivity // which has been implemented here. static score_t fuzzy_score(const char* haystack, const char* needle, bool insensitive) { if(*needle == 0) return SCORE_MIN; int n = strlen(needle); int m = strlen(haystack); score_t match_bonus[m]; precompute_bonus(haystack, match_bonus); if(m > MATCH_FUZZY_MAX_LEN || n > m) { /* * Unreasonably large candidate: return no score * If it is a valid match it will still be returned, it will * just be ranked below any reasonably sized candidates */ return SCORE_MIN; } else if(n == m) { /* Since this method can only be called with a haystack which * matches needle. If the lengths of the strings are equal the * strings themselves must also be equal (ignoring case). */ return SCORE_MAX; } /* * D[][] Stores the best score for this position ending with a match. * M[][] Stores the best possible score at this position. */ score_t D[2][MATCH_FUZZY_MAX_LEN], M[2][MATCH_FUZZY_MAX_LEN]; score_t* last_D, *last_M; score_t* curr_D, *curr_M; last_D = D[0]; last_M = M[0]; curr_D = D[1]; curr_M = M[1]; for(int i = 0; i < n; i++) { match_row(i, curr_D, curr_M, last_D, last_M, needle, haystack, n, m, match_bonus, insensitive); SWAP(curr_D, last_D, score_t *); SWAP(curr_M, last_M, score_t *); } return last_M[m - 1]; } // end fuzzy matching // sorting static int fuzzy_sort(const char* text1, const char* text2, const char* filter, bool insensitive) { bool match1 = fuzzy_match(filter, text1, insensitive); bool match2 = fuzzy_match(filter, text2, insensitive); // both filters match do fuzzy scoring if(match1 && match2) { score_t dist1 = fuzzy_score(text1, filter, insensitive); score_t dist2 = fuzzy_score(text2, filter, insensitive); if(dist1 == dist2) { // same same return 0; } else if(dist1 > dist2) { // highest score wins. // text1 goes first return -1; } else { // text2 goes first return 1; } } else if(match1) { // text1 goes first return -1; } else if(match2) { // text2 goes first return 1; } else { // same same. return 0; } } // we sort based on how early in the string all the matches are. // if there are matches for each. static int multi_contains_sort(const char* text1, const char* text2, const char* filter, bool insensitive) { // sum of string positions of each match int t1_count = 0; int t2_count = 0; // does this string match with mult-contains bool t1_match = true; bool t2_match = true; char new_filter[MAX_MULTI_CONTAINS_FILTER_SIZE]; strncpy(new_filter, filter, sizeof(new_filter)); new_filter[sizeof(new_filter) - 1] = '\0'; char* token; char* rest = new_filter; while((token = strtok_r(rest, " ", &rest))) { char* str1, *str2; if(insensitive) { str1 = strcasestr(text1, token); str2 = strcasestr(text2, token); } else { str1 = strstr(text1, token); str2 = strstr(text2, token); } t1_match = t1_match && str1 != NULL; t2_match = t2_match && str2 != NULL; if(str1 != NULL) { int pos1 = str1 - text1; t1_count += pos1; } if(str2 != NULL) { int pos2 = str2 - text2; t2_count += pos2; } } if(t1_match && t2_match) { // both match // return the one with the smallest count. return t1_count - t2_count; } else if(t1_match) { return -1; } else if(t2_match) { return 1; } else { return 0; } } static int contains_sort(const char* text1, const char* text2, const char* filter, bool insensitive) { char* str1, *str2; if(insensitive) { str1 = strcasestr(text1, filter); str2 = strcasestr(text2, filter); } else { str1 = strstr(text1, filter); str2 = strstr(text2, filter); } bool tx1 = str1 == text1; bool tx2 = str2 == text2; bool txc1 = str1 != NULL; bool txc2 = str2 != NULL; if(tx1 && tx2) { return 0; } else if(tx1) { return -1; } else if(tx2) { return 1; } else if(txc1 && txc2) { return 0; } else if(txc1) { return -1; } else if(txc2) { return 1; } else { return 0; } } int sort_for_matching_mode(const char* text1, const char* text2, int fallback, enum matching_mode match_type, const char* filter, bool insensitive) { int primary = 0; switch(match_type) { case MATCHING_MODE_MULTI_CONTAINS: primary = multi_contains_sort(text1, text2, filter, insensitive); break; case MATCHING_MODE_CONTAINS: primary = contains_sort(text1, text2, filter, insensitive); break; case MATCHING_MODE_FUZZY: primary = fuzzy_sort(text1, text2, filter, insensitive); break; default: return 0; } if(primary == 0) { return fallback; } return primary; } // end sorting