Coverage for src/django_audit_log/migrations/0007_merge_duplicate_paths.py: 44%

32 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-05-02 11:43 +0700

1from django.db import migrations 

2from urllib.parse import urlparse 

3from collections import defaultdict 

4 

5 

6def normalize_path(url): 

7 """Normalize a URL by removing method, server, and port information.""" 

8 if not url: 

9 return "" 

10 

11 # Parse the URL 

12 parsed = urlparse(url) 

13 

14 # If it's already just a path (no scheme/netloc), return it cleaned 

15 if not parsed.scheme and not parsed.netloc: 

16 return parsed.path 

17 

18 # Return just the path component 

19 return parsed.path 

20 

21 

22def merge_duplicate_paths(apps, schema_editor): 

23 """ 

24 Merge LogPath records that point to the same normalized path. 

25 Updates all foreign keys to point to the first instance of each path. 

26 """ 

27 LogPath = apps.get_model('django_audit_log', 'LogPath') 

28 AccessLog = apps.get_model('django_audit_log', 'AccessLog') 

29 db_alias = schema_editor.connection.alias 

30 

31 # Group paths by their normalized version 

32 path_groups = defaultdict(list) 

33 for path in LogPath.objects.using(db_alias).all(): 

34 normalized = normalize_path(path.path) 

35 path_groups[normalized].append(path) 

36 

37 # Process each group of paths 

38 for normalized_path, paths in path_groups.items(): 

39 if len(paths) > 1: 

40 # Keep the first path instance and merge others into it 

41 primary_path = paths[0] 

42 duplicate_paths = paths[1:] 

43 

44 # Update the primary path to use the normalized version 

45 primary_path.path = normalized_path 

46 primary_path.save() 

47 

48 # Update all foreign keys to point to the primary path 

49 for duplicate in duplicate_paths: 

50 # Update AccessLog foreign keys 

51 AccessLog.objects.using(db_alias).filter( 

52 path=duplicate 

53 ).update(path=primary_path) 

54 

55 AccessLog.objects.using(db_alias).filter( 

56 referrer=duplicate 

57 ).update(referrer=primary_path) 

58 

59 AccessLog.objects.using(db_alias).filter( 

60 response_url=duplicate 

61 ).update(response_url=primary_path) 

62 

63 # Delete the duplicate path 

64 duplicate.delete() 

65 

66 

67class Migration(migrations.Migration): 

68 

69 dependencies = [ 

70 ('django_audit_log', '0006_loguseragent_operating_system_version'), 

71 ] 

72 

73 operations = [ 

74 migrations.RunPython( 

75 merge_duplicate_paths, 

76 # No reverse migration provided as this is a data cleanup 

77 reverse_code=migrations.RunPython.noop 

78 ), 

79 ]