Coverage for cc_modules/cc_nlp.py: 15%

26 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-08 23:14 +0000

1#!/usr/bin/env python 

2 

3""" 

4camcops_server/cc_modules/cc_nlp.py 

5 

6=============================================================================== 

7 

8 Copyright (C) 2012, University of Cambridge, Department of Psychiatry. 

9 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

10 

11 This file is part of CamCOPS. 

12 

13 CamCOPS is free software: you can redistribute it and/or modify 

14 it under the terms of the GNU General Public License as published by 

15 the Free Software Foundation, either version 3 of the License, or 

16 (at your option) any later version. 

17 

18 CamCOPS is distributed in the hope that it will be useful, 

19 but WITHOUT ANY WARRANTY; without even the implied warranty of 

20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

21 GNU General Public License for more details. 

22 

23 You should have received a copy of the GNU General Public License 

24 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>. 

25 

26=============================================================================== 

27 

28**Natural language processing functions (of sorts).** 

29 

30""" 

31 

32from typing import Dict 

33 

34# ============================================================================= 

35# Processing names 

36# ============================================================================= 

37 

38TITLES = ["DR", "PROF", "MR", "MISS", "MRS", "MS", "SR"] 

39 

40 

41def guess_name_components(s: str, uppercase: bool = True) -> Dict[str, str]: 

42 """ 

43 Takes a string such as 'Dr James T. Smith, M.D.' and returns parts. 

44 

45 This will not be perfect! If it isn't reasonably sure, it returns 

46 everything in the surname field. 

47 

48 Examples it will fail on: 

49 

50 - Nurse Specialist Jones 

51 

52 Returns: 

53 dict: dictionary with keys "surname", "forename", "prefix" 

54 

55 """ 

56 # Hard. 

57 # http://stackoverflow.com/questions/4276905/ 

58 

59 prefix = "" 

60 forename = "" 

61 

62 # 1. Separate on spaces, chucking any blanks 

63 if s: 

64 parts = [p for p in s.split(" ") if p] 

65 else: 

66 parts = [] 

67 

68 # 2. Prefix? 

69 if len(parts) > 0: 

70 p = parts[0] 

71 if "." in p or p.replace(".", "").upper() in TITLES: 

72 prefix = p 

73 parts = parts[1:] 

74 

75 # 3. Forename, surname 

76 if len(parts) == 2: 

77 if parts[0][-1] == ",": # SURNAME, FORENAME 

78 forename = parts[1] 

79 surname = parts[0] 

80 else: # FORENAME SURNAME 

81 forename = parts[0] 

82 surname = parts[1] 

83 else: # No idea, really; shove it all in the surname component. 

84 surname = " ".join(parts) 

85 

86 if uppercase: 

87 surname = surname.upper() 

88 forename = forename.upper() 

89 prefix = prefix.upper() 

90 return dict(surname=surname, forename=forename, prefix=prefix)