/********************************************************************** * File: reject.h (Formerly reject.h) * Description: Rejection functions used in tessedit * Author: Phil Cheatle * Created: Wed Sep 23 16:50:21 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/ #ifndef REJECT_H #define REJECT_H #include "varable.h" #include "pageres.h" #include "notdll.h" extern INT_VAR_H (tessedit_reject_mode, 5, "Rejection algorithm"); extern INT_VAR_H (tessedit_ok_mode, 5, "Acceptance decision algorithm"); extern BOOL_VAR_H (tessedit_use_nn, TRUE, ""); extern BOOL_VAR_H (tessedit_rejection_debug, FALSE, "Adaption debug"); extern BOOL_VAR_H (tessedit_rejection_stats, FALSE, "Show NN stats"); extern BOOL_VAR_H (tessedit_flip_0O, TRUE, "Contextual 0O O0 flips"); extern double_VAR_H (tessedit_lower_flip_hyphen, 1.5, "Aspect ratio dot/hyphen test"); extern double_VAR_H (tessedit_upper_flip_hyphen, 1.8, "Aspect ratio dot/hyphen test"); extern BOOL_VAR_H (rej_trust_doc_dawg, FALSE, "Use DOC dawg in 11l conf. detector"); extern BOOL_VAR_H (rej_1Il_use_dict_word, FALSE, "Use dictword test"); extern BOOL_VAR_H (rej_1Il_trust_permuter_type, TRUE, "Dont double check"); extern BOOL_VAR_H (one_ell_conflict_default, TRUE, "one_ell_conflict default"); extern BOOL_VAR_H (show_char_clipping, FALSE, "Show clip image window?"); extern BOOL_VAR_H (nn_debug, FALSE, "NN DEBUGGING?"); extern BOOL_VAR_H (nn_reject_debug, FALSE, "NN DEBUG each char?"); extern BOOL_VAR_H (nn_lax, FALSE, "Use 2nd rate matches"); extern BOOL_VAR_H (nn_double_check_dict, FALSE, "Double check"); extern BOOL_VAR_H (nn_conf_double_check_dict, TRUE, "Double check for confusions"); extern BOOL_VAR_H (nn_conf_1Il, TRUE, "NN use 1Il conflicts"); extern BOOL_VAR_H (nn_conf_Ss, TRUE, "NN use Ss conflicts"); extern BOOL_VAR_H (nn_conf_hyphen, TRUE, "NN hyphen conflicts"); extern BOOL_VAR_H (nn_conf_test_good_qual, FALSE, "NN dodgy 1Il cross check"); extern BOOL_VAR_H (nn_conf_test_dict, TRUE, "NN dodgy 1Il cross check"); extern BOOL_VAR_H (nn_conf_test_sensible, TRUE, "NN dodgy 1Il cross check"); extern BOOL_VAR_H (nn_conf_strict_on_dodgy_chs, TRUE, "Require stronger NN match"); extern double_VAR_H (nn_dodgy_char_threshold, 0.99, "min accept score"); extern INT_VAR_H (nn_conf_accept_level, 4, "NN accept dodgy 1Il matches? "); extern INT_VAR_H (nn_conf_initial_i_level, 3, "NN accept initial Ii match level "); extern BOOL_VAR_H (no_unrej_dubious_chars, TRUE, "Dubious chars next to reject?"); extern BOOL_VAR_H (no_unrej_no_alphanum_wds, TRUE, "Stop unrej of non A/N wds?"); extern BOOL_VAR_H (no_unrej_1Il, FALSE, "Stop unrej of 1Ilchars?"); extern BOOL_VAR_H (rej_use_tess_accepted, TRUE, "Individual rejection control"); extern BOOL_VAR_H (rej_use_tess_blanks, TRUE, "Individual rejection control"); extern BOOL_VAR_H (rej_use_good_perm, TRUE, "Individual rejection control"); extern BOOL_VAR_H (rej_use_sensible_wd, FALSE, "Extend permuter check"); extern BOOL_VAR_H (rej_alphas_in_number_perm, FALSE, "Extend permuter check"); extern double_VAR_H (rej_whole_of_mostly_reject_word_fract, 0.85, "if >this fract"); extern INT_VAR_H (rej_mostly_reject_mode, 1, "0-never, 1-afterNN, 2-after new xht"); extern double_VAR_H (tessed_fullstop_aspect_ratio, 1.2, "if >this fract then reject"); extern INT_VAR_H (net_image_width, 40, "NN input image width"); extern INT_VAR_H (net_image_height, 36, "NN input image height"); extern INT_VAR_H (net_image_x_height, 22, "NN input image x_height"); extern INT_VAR_H (tessedit_image_border, 2, "Rej blbs near image edge limit"); extern INT_VAR_H (net_bl_nodes, 20, "Number of baseline nodes"); extern double_VAR_H (nn_reject_threshold, 0.5, "NN min accept score"); extern double_VAR_H (nn_reject_head_and_shoulders, 0.6, "top scores sep factor"); extern STRING_VAR_H (ok_single_ch_non_alphanum_wds, "-?\075", "Allow NN to unrej"); extern STRING_VAR_H (ok_repeated_ch_non_alphanum_wds, "-?*\075", "Allow NN to unrej"); extern STRING_VAR_H (conflict_set_I_l_1, "Il1[]", "Il1 conflict set"); extern STRING_VAR_H (conflict_set_S_s, "Ss$", "Ss conflict set"); extern STRING_VAR_H (conflict_set_hyphen, "-_~", "hyphen conflict set"); extern STRING_VAR_H (dubious_chars_left_of_reject, "!'+`()-./\\<>;:^_,~\"", "Unreliable chars"); extern STRING_VAR_H (dubious_chars_right_of_reject, "!'+`()-./\\<>;:^_,~\"", "Unreliable chars"); extern INT_VAR_H (min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this"); void set_done( //set done flag WERD_RES *word, inT16 pass); void make_reject_map( //make rej map for wd //detailed results WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices, ROW *row, inT16 pass //1st or 2nd? ); void reject_blanks(WERD_RES *word); void reject_I_1_L(WERD_RES *word); //detailed results void reject_poor_matches(WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices); float compute_reject_threshold( //compute threshold //detailed results BLOB_CHOICE_LIST_CLIST *blob_choices); int sort_floats( //qsort function const void *arg1, //ptrs to floats const void *arg2); void reject_edge_blobs(WERD_RES *word); BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map); inT16 first_alphanum_offset(const char *word, const char *word_lengths); inT16 first_alphanum_index(const char *word, const char *word_lengths); inT16 alpha_count(const char *word, const char *word_lengths); BOOL8 word_contains_non_1_digit(const char *word, const char *word_lengths); BOOL8 test_ambig_word( //test for ambiguity WERD_RES *word); //original word BOOL8 ambig_word(const char *start_word, char *temp_word, //alterable copy inT16 test_char_pos //idx to char to alter ); const char *char_ambiguities(char c); #ifndef EMBEDDED void test_ambigs(const char *word); #endif void nn_recover_rejects(WERD_RES *word, ROW *row); void nn_match_word( //Match a word WERD_RES *word, ROW *row); //of character inT16 nn_match_char(IMAGE &scaled_image, float baseline_pos, //rel to scaled_image BOOL8 dict_word, //part of dict wd? BOOL8 checked_dict_word, //part of dict wd? BOOL8 sensible_word, //part acceptable str? BOOL8 centre, //not at word ends? BOOL8 good_quality_word, //initial segmentation char tess_ch //confirm this? ); inT16 evaluate_net_match(char top, float top_score, char next, float next_score, char tess_ch, BOOL8 dict_word, BOOL8 checked_dict_word, BOOL8 sensible_word, BOOL8 centre, BOOL8 good_quality_word); void dont_allow_dubious_chars(WERD_RES *word); void dont_allow_1Il(WERD_RES *word); inT16 count_alphanums( //how many alphanums WERD_RES *word); void reject_mostly_rejects( //rej all if most rejectd WERD_RES *word); BOOL8 repeated_nonalphanum_wd(WERD_RES *word, ROW *row); BOOL8 repeated_ch_string(const char *rep_ch_str, const char *lengths); inT16 safe_dict_word(const char *s); void flip_hyphens(WERD_RES *word); void flip_0O(WERD_RES *word); BOOL8 non_O_upper(const char* str, int length); BOOL8 non_0_digit(const char* str, int length); #endif