diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 53ca0d9..8a8d2af 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,12 @@ --- repos: - repo: https://github.com/Lucas-C/pre-commit-hooks - rev: v1.2.0 + rev: v1.5.5 hooks: - id: remove-tabs - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.2.0 + rev: v4.6.0 hooks: - id: trailing-whitespace - id: check-merge-conflict @@ -22,12 +22,12 @@ repos: - id: debug-statements - repo: https://github.com/pycqa/pydocstyle.git - rev: 6.1.1 + rev: 6.3.0 hooks: - id: pydocstyle - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.2.0 + rev: v4.6.0 hooks: - id: check-toml - id: check-yaml @@ -35,7 +35,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 24.8.0 hooks: - id: black @@ -46,7 +46,7 @@ repos: # - id: check-manifest - repo: https://github.com/s-weigand/flake8-nb - rev: v0.4.0 + rev: v0.5.3 hooks: - id: flake8-nb additional_dependencies: ['pep8-naming'] diff --git a/notebooks/demo2/cascade_mask_rcnn_hrnetv2p_w32_20e_v2.py b/notebooks/demo2/cascade_mask_rcnn_hrnetv2p_w32_20e_v2.py index 44c8d2a..65c5fa5 100644 --- a/notebooks/demo2/cascade_mask_rcnn_hrnetv2p_w32_20e_v2.py +++ b/notebooks/demo2/cascade_mask_rcnn_hrnetv2p_w32_20e_v2.py @@ -1,4 +1,5 @@ """Model settings.""" + model = dict( type="CascadeRCNN", # num_stages=3, diff --git a/notebooks/demo2/config.py b/notebooks/demo2/config.py index 579f042..b27dc2e 100644 --- a/notebooks/demo2/config.py +++ b/notebooks/demo2/config.py @@ -1,4 +1,5 @@ """Default runtime config.""" + import pathlib import os import yaml diff --git a/notebooks/demo2/config_farm_train.py b/notebooks/demo2/config_farm_train.py index 4ad608e..37234bf 100755 --- a/notebooks/demo2/config_farm_train.py +++ b/notebooks/demo2/config_farm_train.py @@ -1,6 +1,5 @@ """Config FARM Training.""" - import pathlib import os from farm.modeling.prediction_head import TextClassificationHead diff --git a/src/components/preprocessing/base_component.py b/src/components/preprocessing/base_component.py index 398d00b..79ba892 100755 --- a/src/components/preprocessing/base_component.py +++ b/src/components/preprocessing/base_component.py @@ -1,6 +1,5 @@ """BaseComponent.""" - from abc import ABC, abstractmethod diff --git a/src/components/preprocessing/base_curator.py b/src/components/preprocessing/base_curator.py index 70f23c2..a9eced5 100755 --- a/src/components/preprocessing/base_curator.py +++ b/src/components/preprocessing/base_curator.py @@ -1,6 +1,5 @@ """BaseCurator.""" - import re from abc import abstractmethod diff --git a/src/components/preprocessing/base_kpi_inference_curator.py b/src/components/preprocessing/base_kpi_inference_curator.py index 1a2e2ae..e1c73ea 100644 --- a/src/components/preprocessing/base_kpi_inference_curator.py +++ b/src/components/preprocessing/base_kpi_inference_curator.py @@ -197,7 +197,7 @@ def return_sliced_squad(self, squad_json, indices): return {} pdf2pars = defaultdict(list) - for (i1, i2) in indices: + for i1, i2 in indices: pdf2pars[i1].append(i2) data = [] diff --git a/src/components/preprocessing/curator.py b/src/components/preprocessing/curator.py index 3b4e519..de73da0 100755 --- a/src/components/preprocessing/curator.py +++ b/src/components/preprocessing/curator.py @@ -1,6 +1,5 @@ """Curator.""" - import glob import logging diff --git a/src/components/preprocessing/extractor.py b/src/components/preprocessing/extractor.py index 04f13d0..f0c8011 100755 --- a/src/components/preprocessing/extractor.py +++ b/src/components/preprocessing/extractor.py @@ -1,6 +1,5 @@ """Extractor class.""" - from .pdf_table_extractor import PDFTableExtractor from .pdf_text_extractor import PDFTextExtractor import logging diff --git a/src/components/preprocessing/kpi_inference_curator.py b/src/components/preprocessing/kpi_inference_curator.py index 4b4b72f..b9da5af 100644 --- a/src/components/preprocessing/kpi_inference_curator.py +++ b/src/components/preprocessing/kpi_inference_curator.py @@ -103,6 +103,7 @@ def clean(self, df): Args: df (A pandas dataframe) """ + # map kpi to question def map_kpi(r): try: diff --git a/src/components/preprocessing/nq_curator.py b/src/components/preprocessing/nq_curator.py index 794fee7..4fb01b5 100755 --- a/src/components/preprocessing/nq_curator.py +++ b/src/components/preprocessing/nq_curator.py @@ -1,6 +1,5 @@ """Processing nq curator.""" - import logging import os from ast import literal_eval diff --git a/src/components/preprocessing/nq_extractor.py b/src/components/preprocessing/nq_extractor.py index 8bb0955..9ecb36f 100755 --- a/src/components/preprocessing/nq_extractor.py +++ b/src/components/preprocessing/nq_extractor.py @@ -1,6 +1,5 @@ """NQExtractor.""" - import json import logging import os diff --git a/src/components/preprocessing/pdf_table_extractor.py b/src/components/preprocessing/pdf_table_extractor.py index 843ff1a..7b262c6 100755 --- a/src/components/preprocessing/pdf_table_extractor.py +++ b/src/components/preprocessing/pdf_table_extractor.py @@ -1,6 +1,5 @@ """PDFTableExtractor.""" - from .base_component import BaseComponent import os from pdf2image import convert_from_path, pdfinfo_from_path diff --git a/src/components/preprocessing/pdf_text_extractor.py b/src/components/preprocessing/pdf_text_extractor.py index debf29e..19dba44 100755 --- a/src/components/preprocessing/pdf_text_extractor.py +++ b/src/components/preprocessing/pdf_text_extractor.py @@ -1,6 +1,5 @@ """PDFTextExtractor.""" - import glob import io import json diff --git a/src/components/preprocessing/text_curator.py b/src/components/preprocessing/text_curator.py index 2c337fe..c33aff5 100755 --- a/src/components/preprocessing/text_curator.py +++ b/src/components/preprocessing/text_curator.py @@ -1,6 +1,5 @@ """TextCurator.""" - import ast import json import logging diff --git a/src/components/utils/cscdtabnet_checkpoint_url.py b/src/components/utils/cscdtabnet_checkpoint_url.py index e2db84a..666a2ea 100755 --- a/src/components/utils/cscdtabnet_checkpoint_url.py +++ b/src/components/utils/cscdtabnet_checkpoint_url.py @@ -1,6 +1,5 @@ """Cscdtabnet checkpoint URLs.""" - checkpoint_url = { "general_model.pth": "https://drive.google.com/uc?id=1-xfq5hDmFdKgbY9FSFTmhSlcb2p13RPn", "icdar_13.pth": "https://drive.google.com/uc?id=1-mVr4UBicFk3mjUz5tsVPjQ4jzRtiT7V", diff --git a/src/components/utils/nq_utils.py b/src/components/utils/nq_utils.py index 639da39..7d5fe97 100755 --- a/src/components/utils/nq_utils.py +++ b/src/components/utils/nq_utils.py @@ -1,6 +1,5 @@ """NQ utils.""" - import re diff --git a/src/components/utils/qa_metrics.py b/src/components/utils/qa_metrics.py index f64cd01..a424e53 100755 --- a/src/components/utils/qa_metrics.py +++ b/src/components/utils/qa_metrics.py @@ -1,6 +1,5 @@ """QA metrics.""" - import numpy as np from farm.evaluation.metrics import squad_EM, squad_f1 from sklearn.metrics import confusion_matrix diff --git a/src/data/__init__.py b/src/data/__init__.py index 7cf612e..2aa025b 100644 --- a/src/data/__init__.py +++ b/src/data/__init__.py @@ -1,2 +1,3 @@ """Data collection module.""" + from .s3_communication import S3FileType, S3Communication # noqa F401 diff --git a/src/data/make_dataset.py b/src/data/make_dataset.py index 60ed23b..758967f 100644 --- a/src/data/make_dataset.py +++ b/src/data/make_dataset.py @@ -1,4 +1,5 @@ """Data collection code.""" + import click import logging from pathlib import Path diff --git a/src/data/s3_communication.py b/src/data/s3_communication.py index 82df3d3..dc6ed18 100644 --- a/src/data/s3_communication.py +++ b/src/data/s3_communication.py @@ -1,4 +1,5 @@ """S3 communication tools.""" + import os import pathlib import os.path as osp diff --git a/src/models/__init__.py b/src/models/__init__.py index c8ba266..a8e69d3 100644 --- a/src/models/__init__.py +++ b/src/models/__init__.py @@ -1,4 +1,5 @@ """Module for models.""" + from .farm_trainer import FARMTrainer from .qa_farm_trainer import QAFARMTrainer from .trainer_optuna import TrainerOptuna diff --git a/src/models/farm_trainer.py b/src/models/farm_trainer.py index 9dfab98..647f55c 100755 --- a/src/models/farm_trainer.py +++ b/src/models/farm_trainer.py @@ -1,6 +1,5 @@ """FARM Trainer.""" - import logging import os diff --git a/src/models/qa_farm_trainer.py b/src/models/qa_farm_trainer.py index 908f902..420d116 100755 --- a/src/models/qa_farm_trainer.py +++ b/src/models/qa_farm_trainer.py @@ -1,6 +1,5 @@ """QA Farm trainer.""" - import logging import json import os diff --git a/src/models/relevance_infer.py b/src/models/relevance_infer.py index 79855e9..4c67358 100755 --- a/src/models/relevance_infer.py +++ b/src/models/relevance_infer.py @@ -1,6 +1,5 @@ """RelevanceInfer.""" - import json import logging import os diff --git a/src/models/text_kpi_infer.py b/src/models/text_kpi_infer.py index ee6f7dc..1c66173 100755 --- a/src/models/text_kpi_infer.py +++ b/src/models/text_kpi_infer.py @@ -1,6 +1,5 @@ """Text KPI Inference.""" - import glob import logging import os @@ -61,9 +60,9 @@ def __init__(self, infer_config, n_best_per_sample=1): self.model.model.prediction_heads[0].n_best_per_sample = n_best_per_sample # If positive, this will boost "No Answer" as prediction. # If negative, this will decrease the model from giving "No Answer" as prediction. - self.model.model.prediction_heads[ - 0 - ].no_ans_boost = self.infer_config.no_ans_boost + self.model.model.prediction_heads[0].no_ans_boost = ( + self.infer_config.no_ans_boost + ) self.result_dir = self.infer_config.result_dir["Text"] if not os.path.exists(self.result_dir): os.makedirs(self.result_dir)