Skip to content

Usage examples

from comparisonframe import ComparisonFrame

1. Creating validation set

1.1 Initialize comparison class

comparer = ComparisonFrame(
    # optionally
    ## mocker default parameters
    mocker_params = {
        'file_path' : "./comparisonframe_storage",
         'persist' : True},

    ## scores to calculate
    compare_scores = ['word_count_diff','semantic_similarity'],
    aggr_scores = ['median']
)
/home/kyriosskia/miniforge3/envs/testenv/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884
  warnings.warn(

1.2 Recording queries and expected responses (validation set)

comparer.record_queries(
    queries = ["Black metal", 
               "Tribulation"],
    expected_texts = ["Black metal is an extreme subgenre of heavy metal music.",
    "Tribulation are a Swedish heavy metal band from Arvika that formed in 2005."],
    metadata = {'name' : 'metal_bands'})

2. Comparing newly generated data with expected results

2.1 Initialize new comparison class

comparer = ComparisonFrame(
    # optionally
    ## mocker default parameters
    mocker_params = {
        'file_path' : "./comparisonframe_storage",
         'persist' : True},

    ## scores to calculate
    compare_scores = ['word_count_diff','semantic_similarity'],
    aggr_scores = ['median']
)

2.2 Show validation set

untested_queries = comparer.get_all_queries(
    ## optional
    metadata_filters={'name' : 'metal_bands'})
print(untested_queries)
['Black metal', 'Tribulation']
comparer.get_all_records()
[{'expected_text': 'Black metal is an extreme subgenre of heavy metal music.',
  'record_id': '0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8bd7b663c92f2f16e87',
  'query': 'Black metal'},
 {'expected_text': 'Tribulation are a Swedish heavy metal band from Arvika that formed in 2005.',
  'record_id': 'eecd9c2a5b25ee6053891b894157fa30372ed694763385e1ada1dc9ad8e41625',
  'query': 'Tribulation'}]
comparer.get_all_records_df()
expected_text record_id query
0 Black metal is an extreme subgenre of heavy me... 0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8... Black metal
1 Tribulation are a Swedish heavy metal band fro... eecd9c2a5b25ee6053891b894157fa30372ed694763385... Tribulation

2.3 Insert newly generated with records

valid_answer_query_1 = "Black metal is an extreme subgenre of heavy metal music."
very_similar_answer_query_1 = "Black metal is a subgenre of heavy metal music."
unexpected_answer_query_1 = "Black metals are beautiful and are often used in jewelry design."
comparer.record_runs(queries = ["Black metal"],
                     provided_texts = [valid_answer_query_1,
                                      very_similar_answer_query_1,
                                      unexpected_answer_query_1],
                    metadata={'desc' : 'definitions'})
comparer.get_all_runs()
[{'query': 'Black metal',
  'provided_text': 'Black metal is an extreme subgenre of heavy metal music.',
  'run_id': 'faf5aab28ee8d460cbb69c6f434bee622aff8cdfb8796282bdc547fff2c1abf8',
  'timestamp': '2024-09-26 01:36:13'},
 {'query': 'Black metal',
  'provided_text': 'Black metal is a subgenre of heavy metal music.',
  'run_id': '9fbd80050d382972c012ffcb4641f48d6220afb2210a20a11da5c7a48664f033',
  'timestamp': '2024-09-26 01:36:13'},
 {'query': 'Black metal',
  'provided_text': 'Black metals are beautiful and are often used in jewelry design.',
  'run_id': 'e4fc3f56c95d4266b6543a306c4305e0d8b960a1e0196d05cfc8ee4ea0bd7129',
  'timestamp': '2024-09-26 01:36:13'}]
df = comparer.get_all_runs_df()
df
query provided_text run_id timestamp
0 Black metal Black metal is an extreme subgenre of heavy me... faf5aab28ee8d460cbb69c6f434bee622aff8cdfb87962... 2024-09-26 01:36:13
1 Black metal Black metal is a subgenre of heavy metal music. 9fbd80050d382972c012ffcb4641f48d6220afb2210a20... 2024-09-26 01:36:13
2 Black metal Black metals are beautiful and are often used ... e4fc3f56c95d4266b6543a306c4305e0d8b960a1e0196d... 2024-09-26 01:36:13

2.4 Comparing runs with records

comparer.compare_runs_with_records()
WARNING:ComparisonFrame:No data was found with applied filters!
comparer.get_all_run_scores()
[{'query': 'Black metal',
  'provided_text': 'Black metal is an extreme subgenre of heavy metal music.',
  'run_id': 'faf5aab28ee8d460cbb69c6f434bee622aff8cdfb8796282bdc547fff2c1abf8',
  'timestamp': '2024-09-26 01:36:13',
  'record_id': '0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8bd7b663c92f2f16e87',
  'word_count_diff': 0,
  'semantic_similarity': 0.9999999403953552,
  'comparison_id': 'cdb16a8d16a95e85d879c29aaf9762c9e2776843f2a01d6ef9154daacd9b732d'},
 {'query': 'Black metal',
  'provided_text': 'Black metal is a subgenre of heavy metal music.',
  'run_id': '9fbd80050d382972c012ffcb4641f48d6220afb2210a20a11da5c7a48664f033',
  'timestamp': '2024-09-26 01:36:13',
  'record_id': '0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8bd7b663c92f2f16e87',
  'word_count_diff': 1,
  'semantic_similarity': 0.9859851002693176,
  'comparison_id': '16472e44ac7d2d74e18ea583490c2f6b8661cc8b48cc9b7480a51dc8c6796c41'},
 {'query': 'Black metal',
  'provided_text': 'Black metals are beautiful and are often used in jewelry design.',
  'run_id': 'e4fc3f56c95d4266b6543a306c4305e0d8b960a1e0196d05cfc8ee4ea0bd7129',
  'timestamp': '2024-09-26 01:36:13',
  'record_id': '0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8bd7b663c92f2f16e87',
  'word_count_diff': 1,
  'semantic_similarity': 0.4940534234046936,
  'comparison_id': '966c1da5e641480e8ccd33a7d0f544d9ec6c4e2e799be11529d2cf7a222deb9a'}]
comparer.get_all_run_scores_df()
query provided_text run_id timestamp record_id word_count_diff semantic_similarity comparison_id
0 Black metal Black metal is an extreme subgenre of heavy me... faf5aab28ee8d460cbb69c6f434bee622aff8cdfb87962... 2024-09-26 01:36:13 0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8... 0 1.000000 cdb16a8d16a95e85d879c29aaf9762c9e2776843f2a01d...
1 Black metal Black metal is a subgenre of heavy metal music. 9fbd80050d382972c012ffcb4641f48d6220afb2210a20... 2024-09-26 01:36:13 0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8... 1 0.985985 16472e44ac7d2d74e18ea583490c2f6b8661cc8b48cc9b...
2 Black metal Black metals are beautiful and are often used ... e4fc3f56c95d4266b6543a306c4305e0d8b960a1e0196d... 2024-09-26 01:36:13 0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8... 1 0.494053 966c1da5e641480e8ccd33a7d0f544d9ec6c4e2e799be1...

3 Calculating aggregate comparison scores

comparer.calculate_aggr_scores(group_by = ['desc'])
WARNING:ComparisonFrame:No data was found with applied filters!
comparer.get_all_aggr_scores()
[{'timestamp': '2024-09-26 01:36:13',
  'comparison_id': ['cdb16a8d16a95e85d879c29aaf9762c9e2776843f2a01d6ef9154daacd9b732d',
   '16472e44ac7d2d74e18ea583490c2f6b8661cc8b48cc9b7480a51dc8c6796c41',
   '966c1da5e641480e8ccd33a7d0f544d9ec6c4e2e799be11529d2cf7a222deb9a'],
  'query': ['Black metal'],
  'grouped_by': ['query'],
  'group': {'query': 'Black metal'},
  'median_word_count_diff': 1.0,
  'median_semantic_similarity': 0.9859851002693176,
  'record_status_id': 'dc1126e128d42f74bb98bad9ce4101fe1a4ea5a46df57d430dea99fdd4b8c628'}]
comparer.get_all_aggr_scores_df(grouped_by = ['desc'])
timestamp comparison_id query grouped_by group median_word_count_diff median_semantic_similarity record_status_id
0 2024-09-26 01:36:13 [cdb16a8d16a95e85d879c29aaf9762c9e2776843f2a01... [Black metal] [desc] {'desc': 'definitions'} 1.0 0.985985 c9d97729c5b03641fbf8fd35d257f2f1024a812f097ffb...

4. Recording test statuses

comparer.calculate_test_statuses(test_query = "median_semantic_similarity > 0.9")

comparer.get_test_statuses()
[{'timestamp': '2024-09-26 01:36:13',
  'record_id': '0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8bd7b663c92f2f16e87',
  'record_status_id': 'dc1126e128d42f74bb98bad9ce4101fe1a4ea5a46df57d430dea99fdd4b8c628',
  'query': 'Black metal',
  'test': 'median_semantic_similarity > 0.9',
  'valid': True}]
comparer.get_test_statuses_df()
timestamp record_id record_status_id query test valid
0 2024-09-26 01:36:13 0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8... dc1126e128d42f74bb98bad9ce4101fe1a4ea5a46df57d... Black metal median_semantic_similarity > 0.9 True

5. Reseting statuses, flushing records and comparison results

comparer.flush_records()
comparer.flush_runs()
comparer.flush_comparison_scores()
comparer.flush_aggregate_scores()
comparer.flush_test_statuses()