kazu.annotation.acceptance_test

Functions

acceptance_criteria()

aggregate_linking_results(class_and_scorers)

aggregate_ner_results(class_and_scorers)

analyse_annotation_consistency(docs)

analyse_full_pipeline(pipeline, docs, ...)

check_annotation_consistency(cfg)

check_ent_class_consistency(...)

Checks to see if any match strings have different entity_class information.

check_ent_mapping_consistency(...)

Checks to see if any entity string matches have inconsistent mapping information.

check_ent_match_abnormalities(...)

Checks to see if any gold standard spans look a bit weird.

check_results_meet_threshold(results, thresholds)

execute_full_pipeline_acceptance_test(cfg)

score_sections(docs)

Score a list of documents by Section.

Classes

AggregatedAccuracyResult

AggregatedAccuracyResult(tp: int = 0, fp: int = 0, fn: int = 0, fp_counter: collections.Counter = <factory>, fn_counter: collections.Counter = <factory>, fp_items_to_tasks: dict[typing.Any, set[str]] = <factory>, fn_items_to_tasks: dict[typing.Any, set[str]] = <factory>)

SectionScorer

Exceptions

exception kazu.annotation.acceptance_test.AcceptanceTestFailure[source]

Bases: Exception

class kazu.annotation.acceptance_test.AggregatedAccuracyResult[source]

Bases: object

AggregatedAccuracyResult(tp: int = 0, fp: int = 0, fn: int = 0, fp_counter: collections.Counter = <factory>, fn_counter: collections.Counter = <factory>, fp_items_to_tasks: dict[typing.Any, set[str]] = <factory>, fn_items_to_tasks: dict[typing.Any, set[str]] = <factory>)

__init__(tp=0, fp=0, fn=0, fp_counter=<factory>, fn_counter=<factory>, fp_items_to_tasks=<factory>, fn_items_to_tasks=<factory>)[source]
Parameters:
Return type:

None

add_fn(item, task)[source]
Parameters:
Return type:

None

add_fp(item, task)[source]
Parameters:
Return type:

None

tasks_for_fn(items)[source]
Parameters:

items (list[Any])

Return type:

Iterable[str]

tasks_for_fp(items)[source]
Parameters:

items (list[Any])

Return type:

Iterable[str]

fn: int = 0
fn_counter: Counter
property fn_info: list[Any]
fn_items_to_tasks: dict[Any, set[str]]
fp: int = 0
fp_counter: Counter
property fp_info: list[Any]
fp_items_to_tasks: dict[Any, set[str]]
property precision: float
property recall: float
tp: int = 0
class kazu.annotation.acceptance_test.SectionScorer[source]

Bases: object

__init__(task, gold_ents, test_ents)[source]
Parameters:
calculate_linking_matches()[source]
calculate_ner_matches()[source]
static group_mappings_by_source(ents)[source]
Parameters:

ents (Iterable[Entity])

Return type:

dict[str, set[tuple[str, str]]]

kazu.annotation.acceptance_test.acceptance_criteria()[source]
Return type:

dict[str, dict[str, dict[str, float]]]

kazu.annotation.acceptance_test.aggregate_linking_results(class_and_scorers)[source]
Parameters:

class_and_scorers (dict[str, list[SectionScorer]])

Return type:

dict[str, AggregatedAccuracyResult]

kazu.annotation.acceptance_test.aggregate_ner_results(class_and_scorers)[source]
Parameters:

class_and_scorers (dict[str, list[SectionScorer]])

Return type:

dict[str, AggregatedAccuracyResult]

kazu.annotation.acceptance_test.analyse_annotation_consistency(docs)[source]
Parameters:

docs (list[Document])

Return type:

None

kazu.annotation.acceptance_test.analyse_full_pipeline(pipeline, docs, acceptance_criteria)[source]
Parameters:
Return type:

None

kazu.annotation.acceptance_test.check_annotation_consistency(cfg)[source]
kazu.annotation.acceptance_test.check_ent_class_consistency(ent_to_task_lookup, ents, match_str, messages)[source]

Checks to see if any match strings have different entity_class information.

Parameters:
Returns:

Return type:

None

kazu.annotation.acceptance_test.check_ent_mapping_consistency(ent_to_task_lookup, ents, match_str, messages)[source]

Checks to see if any entity string matches have inconsistent mapping information.

Parameters:
Returns:

Return type:

None

kazu.annotation.acceptance_test.check_ent_match_abnormalities(ent_to_task_lookup, ents, match_str, messages)[source]

Checks to see if any gold standard spans look a bit weird.

Parameters:
Returns:

Return type:

None

kazu.annotation.acceptance_test.check_results_meet_threshold(results, thresholds)[source]
Parameters:
Return type:

None

kazu.annotation.acceptance_test.execute_full_pipeline_acceptance_test(cfg)[source]
kazu.annotation.acceptance_test.score_sections(docs)[source]

Score a list of documents by Section.

Parameters:

docs (list[Document])

Returns:

dict of entity class to one scorer per section

Return type:

dict[str, list[SectionScorer]]