Fix (modelscope#781)

tastelikefeet · Mar 4, 2024 · 30bae2e · 30bae2e
1 parent 614be60
commit 30bae2e
Show file tree

Hide file tree

Showing 54 changed files with 101 additions and 101 deletions.
diff --git a/modelscope/models/cv/action_detection/modules/resnet.py b/modelscope/models/cv/action_detection/modules/resnet.py
@@ -233,7 +233,7 @@ def __init__(self,
             ops=ops[sum(layers[:3], 0):][:layers[3]])
         if num_classes is not None:
             self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
-            self.sptial_atten = nn.Conv2d(2, 1, kernel_size=7, padding=3)
+            self.spatial_atten = nn.Conv2d(2, 1, kernel_size=7, padding=3)
             self.drop = nn.Dropout(0.5)
             if reduce_dim > 0:
                 self.rd_conv = nn.Conv2d(
@@ -308,7 +308,7 @@ def features(self, x):
             ftr = torch.cat(
                 (x.max(dim=1, keepdim=True)[0], x.mean(dim=1, keepdim=True)),
                 dim=1)
-            score = self.sptial_atten(ftr)  # N,1,H,W
+            score = self.spatial_atten(ftr)  # N,1,H,W
             x = x * torch.sigmoid(score)  # N,C,H,W
             self.score = score
 

diff --git a/modelscope/models/cv/action_recognition/s3dg.py b/modelscope/models/cv/action_recognition/s3dg.py
@@ -47,7 +47,7 @@ class InceptionBlock3D(nn.Module):
     Element constructing the S3D/S3DG.
     See models/base/backbone.py L99-186.
 
-    Modifed from https://github.com/TengdaHan/CoCLR/blob/main/backbone/s3dg.py.
+    Modified from https://github.com/TengdaHan/CoCLR/blob/main/backbone/s3dg.py.
     """
 
     def __init__(self, cfg, in_planes, out_planes):
@@ -139,7 +139,7 @@ class STConv3d(nn.Module):
     Element constructing the S3D/S3DG.
     See models/base/backbone.py L99-186.
 
-    Modifed from https://github.com/TengdaHan/CoCLR/blob/main/backbone/s3dg.py.
+    Modified from https://github.com/TengdaHan/CoCLR/blob/main/backbone/s3dg.py.
     """
 
     def __init__(self,
@@ -213,7 +213,7 @@ def forward(self, x):
 class Inception3D(nn.Module):
     """
     Backbone architecture for I3D/S3DG.
-    Modifed from https://github.com/TengdaHan/CoCLR/blob/main/backbone/s3dg.py.
+    Modified from https://github.com/TengdaHan/CoCLR/blob/main/backbone/s3dg.py.
     """
 
     def __init__(self, cfg):

diff --git a/modelscope/models/cv/anydoor/datasets/data_utils.py b/modelscope/models/cv/anydoor/datasets/data_utils.py
@@ -225,7 +225,7 @@ def get_mosaic_mask(image, fg_mask, N=16, ratio=0.5):
     return noise_mask
 
 
-def extract_canney_noise(image, mask, dilate=True):
+def extract_canny_noise(image, mask, dilate=True):
     h, w = image.shape[0], image.shape[1]
     mask = cv2.resize(mask.astype(np.uint8), (w, h)) > 0.5
     kernel = np.ones((8, 8), dtype=np.uint8)

diff --git a/modelscope/models/cv/anydoor/ldm/modules/attention.py b/modelscope/models/cv/anydoor/ldm/modules/attention.py
@@ -14,9 +14,9 @@
 try:
     import xformers
     import xformers.ops
-    XFORMERS_IS_AVAILBLE = True
+    XFORMERS_IS_AVAILABLE = True
 except Exception:
-    XFORMERS_IS_AVAILBLE = False
+    XFORMERS_IS_AVAILABLE = False
 
 _ATTN_PRECISION = os.environ.get('ATTN_PRECISION', 'fp32')
 
@@ -258,7 +258,7 @@ def __init__(self,
                  checkpoint=True,
                  disable_self_attn=False):
         super().__init__()
-        attn_mode = 'softmax-xformers' if XFORMERS_IS_AVAILBLE else 'softmax'
+        attn_mode = 'softmax-xformers' if XFORMERS_IS_AVAILABLE else 'softmax'
         assert attn_mode in self.ATTENTION_MODES
         attn_cls = self.ATTENTION_MODES[attn_mode]
         self.disable_self_attn = disable_self_attn

diff --git a/modelscope/models/cv/anydoor/ldm/modules/diffusionmodules/model.py b/modelscope/models/cv/anydoor/ldm/modules/diffusionmodules/model.py
@@ -12,9 +12,9 @@
 try:
     import xformers
     import xformers.ops
-    XFORMERS_IS_AVAILBLE = True
+    XFORMERS_IS_AVAILABLE = True
 except Exception:
-    XFORMERS_IS_AVAILBLE = False
+    XFORMERS_IS_AVAILABLE = False
     print("No module 'xformers'. Proceeding without it.")
 
 
@@ -259,7 +259,7 @@ def make_attn(in_channels, attn_type='vanilla', attn_kwargs=None):
         'vanilla', 'vanilla-xformers', 'memory-efficient-cross-attn', 'linear',
         'none'
     ], f'attn_type {attn_type} unknown'
-    if XFORMERS_IS_AVAILBLE and attn_type == 'vanilla':
+    if XFORMERS_IS_AVAILABLE and attn_type == 'vanilla':
         attn_type = 'vanilla-xformers'
     print(
         f"making attention of type '{attn_type}' with {in_channels} in_channels"

diff --git a/modelscope/models/cv/anydoor/ldm/modules/diffusionmodules/openaimodel.py b/modelscope/models/cv/anydoor/ldm/modules/diffusionmodules/openaimodel.py
@@ -362,7 +362,7 @@ def count_flops_attn(model, _x, y):
 
 class QKVAttentionLegacy(nn.Module):
     """
-    A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping
+    A module which performs QKV attention. Matches legacy QKVAttention + input/output heads shaping
     """
 
     def __init__(self, n_heads):

diff --git a/modelscope/models/cv/body_3d_keypoints/__init__.py b/modelscope/models/cv/body_3d_keypoints/__init__.py
@@ -4,11 +4,11 @@
 from modelscope.utils.import_utils import LazyImportModule
 
 if TYPE_CHECKING:
-    from .cannonical_pose import BodyKeypointsDetection3D
+    from .canonical_pose import BodyKeypointsDetection3D
     from .hdformer import HDFormerDetector
 else:
     _import_structure = {
-        'cannonical_pose': ['BodyKeypointsDetection3D'],
+        'canonical_pose': ['BodyKeypointsDetection3D'],
         'hdformer': ['HDFormerDetector'],
     }
 

diff --git a/..._3d_keypoints/cannonical_pose/__init__.py → ...y_3d_keypoints/canonical_pose/__init__.py b/..._3d_keypoints/cannonical_pose/__init__.py → ...y_3d_keypoints/canonical_pose/__init__.py
diff --git a/...keypoints/cannonical_pose/body_3d_pose.py → ..._keypoints/canonical_pose/body_3d_pose.py b/...keypoints/cannonical_pose/body_3d_pose.py → ..._keypoints/canonical_pose/body_3d_pose.py
@@ -10,7 +10,7 @@
 from modelscope.metainfo import Models
 from modelscope.models.base.base_torch_model import TorchModel
 from modelscope.models.builder import MODELS
-from modelscope.models.cv.body_3d_keypoints.cannonical_pose.canonical_pose_modules import (
+from modelscope.models.cv.body_3d_keypoints.canonical_pose.canonical_pose_modules import (
     TemporalModel, TransCan3Dkeys)
 from modelscope.utils.config import Config
 from modelscope.utils.constant import ModelFile, Tasks
@@ -218,17 +218,17 @@ def get_abs_2d_pts(self, input_video_frame_num, pose2d_rr,
         w = input_video_frame_num - pad * 2
 
         lst_pose2d_rr = []
-        lst_pose2d_cannoical = []
+        lst_pose2d_canonical = []
         for i in range(pad, w + pad):
             lst_pose2d_rr.append(pose2d_rr[:, i - pad:i + pad + 1])
-            lst_pose2d_cannoical.append(pose2d_canonical[:,
+            lst_pose2d_canonical.append(pose2d_canonical[:,
                                                          i - pad:i + pad + 1])
 
-        input_pose2d_rr = torch.cat(lst_pose2d_cannoical, axis=0)
-        input_pose2d_cannoical = torch.cat(lst_pose2d_cannoical, axis=0)
+        input_pose2d_rr = torch.cat(lst_pose2d_canonical, axis=0)
+        input_pose2d_canonical = torch.cat(lst_pose2d_canonical, axis=0)
 
         if self.cfg.model.MODEL.USE_CANONICAL_COORDS:
-            input_pose2d_abs = input_pose2d_cannoical.clone()
+            input_pose2d_abs = input_pose2d_canonical.clone()
         else:
             input_pose2d_abs = input_pose2d_rr.clone()
             input_pose2d_abs[:, :, 1:] += input_pose2d_abs[:, :, :1]
@@ -238,8 +238,8 @@ def get_abs_2d_pts(self, input_video_frame_num, pose2d_rr,
     def canonicalize_2Ds(self, pos2d, f, c):
         cs = np.array([c[0], c[1]]).reshape(1, 1, 2)
         fs = np.array([f[0], f[1]]).reshape(1, 1, 2)
-        canoical_2Ds = (pos2d - cs) / fs
-        return canoical_2Ds
+        canonical_2Ds = (pos2d - cs) / fs
+        return canonical_2Ds
 
     def normalize_screen_coordinates(self, X, w, h):
         assert X.shape[-1] == 2

diff --git a/...cannonical_pose/canonical_pose_modules.py → .../canonical_pose/canonical_pose_modules.py b/...cannonical_pose/canonical_pose_modules.py → .../canonical_pose/canonical_pose_modules.py
diff --git a/modelscope/models/cv/body_3d_keypoints/hdformer/hdformer_detector.py b/modelscope/models/cv/body_3d_keypoints/hdformer/hdformer_detector.py
@@ -58,7 +58,7 @@ def load_model(self, load_to_cpu=False):
         self.net.eval()
 
     def preprocess(self, input: Dict[str, Any]) -> Dict[str, Any]:
-        """Proprocess of 2D input joints.
+        """Preprocess of 2D input joints.
 
         Args:
             input (Dict[str, Any]): [NUM_FRAME, NUM_JOINTS, 2], input 2d human body keypoints.

diff --git a/modelscope/models/cv/controllable_image_generation/annotator/midas/midas/transforms.py b/modelscope/models/cv/controllable_image_generation/annotator/midas/midas/transforms.py
@@ -7,7 +7,7 @@
 
 
 def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
-    """Rezise the sample to ensure the given size. Keeps aspect ratio.
+    """Resize the sample to ensure the given size. Keeps aspect ratio.
 
     Args:
         sample (dict): sample
@@ -133,7 +133,7 @@ def get_size(self, width, height):
                     # fit height
                     scale_width = scale_height
             elif self.__resize_method == 'minimal':
-                # scale as least as possbile
+                # scale as least as possible
                 if abs(1 - scale_width) < abs(1 - scale_height):
                     # fit width
                     scale_height = scale_width
@@ -198,7 +198,7 @@ def __call__(self, sample):
 
 
 class NormalizeImage(object):
-    """Normlize image by given mean and std.
+    """Normalize image by given mean and std.
     """
 
     def __init__(self, mean, std):

diff --git a/modelscope/models/cv/controllable_image_generation/annotator/mlsd/utils.py b/modelscope/models/cv/controllable_image_generation/annotator/mlsd/utils.py
@@ -13,7 +13,7 @@
 from torch.nn import functional as F
 
 
-def deccode_output_score_and_ptss(tpMap, topk_n=200, ksize=5):
+def decode_output_score_and_ptss(tpMap, topk_n=200, ksize=5):
     '''
     tpMap:
     center: tpMap[1, 0, :, :]
@@ -61,7 +61,7 @@ def pred_lines(image,
 
     batch_image = torch.from_numpy(batch_image).float().cuda()
     outputs = model(batch_image)
-    pts, pts_score, vmap = deccode_output_score_and_ptss(outputs, 200, 3)
+    pts, pts_score, vmap = decode_output_score_and_ptss(outputs, 200, 3)
     start = vmap[:, :, :2]
     end = vmap[:, :, 2:]
     dist_map = np.sqrt(np.sum((start - end)**2, axis=-1))
@@ -116,7 +116,7 @@ def pred_squares(image, model, input_shape=[512, 512], params=params_glob):
     batch_image = torch.from_numpy(batch_image).float().cuda()
     outputs = model(batch_image)
 
-    pts, pts_score, vmap = deccode_output_score_and_ptss(outputs, 200, 3)
+    pts, pts_score, vmap = decode_output_score_and_ptss(outputs, 200, 3)
     start = vmap[:, :, :2]  # (x, y)
     end = vmap[:, :, 2:]  # (x, y)
     dist_map = np.sqrt(np.sum((start - end)**2, axis=-1))
@@ -268,7 +268,7 @@ def pred_squares(image, model, input_shape=[512, 512], params=params_glob):
         | dist(inter,0), dist(inter,0), dist(inter,0), ... |
         | dist(inter,1), dist(inter,1), dist(inter,1), ... |
         ...
-    dist_inter_to_semgnet2:
+    dist_inter_to_segment2:
         | dist(inter,0), dist(inter,1), dist(inter,2), ... |
         | dist(inter,0), dist(inter,1), dist(inter,2), ... |
         ...

diff --git a/modelscope/models/cv/controllable_image_generation/annotator/openpose/body.py b/modelscope/models/cv/controllable_image_generation/annotator/openpose/body.py
@@ -130,7 +130,7 @@ def __call__(self, oriImg):
         limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9],
                    [9, 10], [10, 11], [2, 12], [12, 13], [13, 14], [2, 1],
                    [1, 15], [15, 17], [1, 16], [16, 18], [3, 17], [6, 18]]
-        # the middle joints heatmap correpondence
+        # the middle joints heatmap correspondence
         mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44],
                   [19, 20], [21, 22], [23, 24], [25, 26], [27, 28], [29, 30],
                   [47, 48], [49, 50], [53, 54], [51, 52], [55, 56], [37, 38],

diff --git a/modelscope/models/cv/crowd_counting/hrnet_aspp_relu.py b/modelscope/models/cv/crowd_counting/hrnet_aspp_relu.py
@@ -556,10 +556,10 @@ def forward(self, x):
             x = x + F.relu_(aspp_out[i] * 0.25) * pred_attn_list[i]
 
         bz = x.size(0)
-        # -- Besides, we also need to let the prediction attention be close to visable domain
+        # -- Besides, we also need to let the prediction attention be close to visible domain
         # -- Calculate the domain distance and get the weights
         # - First, detach domains
-        G_all_d = self.G_all.detach()  # use detached G_all for calulcating
+        G_all_d = self.G_all.detach()  # use detached G_all for calculating
         pred_attn_d = pred_attn.detach().view(bz, 512, 1, 1)
 
         if self.cosine == 1:

diff --git a/modelscope/models/cv/face_detection/mogface/models/resnet.py b/modelscope/models/cv/face_detection/mogface/models/resnet.py
@@ -1,6 +1,6 @@
-# The implementation is modified from original resent implementaiton, which is
-#  also open-sourced by the authors as Yang Liu,
-#  and is available publicly on  https://github.com/damo-cv/MogFace
+# The implementation is modified from original resent implementation, which is
+# also open-sourced by the authors as Yang Liu,
+# and is available publicly on  https://github.com/damo-cv/MogFace
 
 import torch.nn as nn
 

diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/master_net.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/backbones/master_net.py
@@ -27,7 +27,7 @@ def __init__(self,
         """
         Any ReLU-CNN Backbone
         Args:
-        plainet_struct: (obj: str):
+        plainnet_struct: (obj: str):
             Str of network topology structure.
         no_reslink: (obj:bool):
             no use residual structure.

diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/base.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/base.py
@@ -1,5 +1,5 @@
 """
-The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
+The implementation here is modified based on insightface, originally MIT license and publicly available at
 https://github.com/deepinsight/insightface/blob/master/detection/scrfd/mmdet/models/detectors/base.py
 """
 from abc import ABCMeta, abstractmethod

diff --git a/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/single_stage.py b/modelscope/models/cv/face_detection/scrfd/mmdet_patch/models/detectors/single_stage.py
@@ -1,5 +1,5 @@
 """
-The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at
+The implementation here is modified based on insightface, originally MIT license and publicly available at
 https://github.com/deepinsight/insightface/blob/master/detection/scrfd/mmdet/models/detectors/single_stage.py
 """
 import torch

diff --git a/modelscope/models/cv/face_emotion/efficient/utils.py b/modelscope/models/cv/face_emotion/efficient/utils.py
@@ -207,7 +207,7 @@ def forward(self, x):
 
 class Conv2dStaticSamePadding(nn.Conv2d):
     """2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size.
-       The padding mudule is calculated in construction function, then used in forward.
+       The padding module is calculated in construction function, then used in forward.
     """
 
     def __init__(self,

diff --git a/modelscope/models/cv/face_human_hand_detection/ghost_pan.py b/modelscope/models/cv/face_human_hand_detection/ghost_pan.py
@@ -186,7 +186,7 @@ class GhostBlocks(nn.Module):
         out_channels (int): Number of output channels.
         expand (int): Expand ratio of GhostBottleneck. Default: 1.
         kernel_size (int): Kernel size of depthwise convolution. Default: 5.
-        num_blocks (int): Number of GhostBottlecneck blocks. Default: 1.
+        num_blocks (int): Number of GhostBottleneck blocks. Default: 1.
         use_res (bool): Whether to use residual connection. Default: False.
         activation (str): Name of activation function. Default: LeakyReLU.
     """
@@ -242,7 +242,7 @@ class GhostPAN(nn.Module):
             blocks. Default: False
         kernel_size (int): Kernel size of depthwise convolution. Default: 5.
         expand (int): Expand ratio of GhostBottleneck. Default: 1.
-        num_blocks (int): Number of GhostBottlecneck blocks. Default: 1.
+        num_blocks (int): Number of GhostBottleneck blocks. Default: 1.
         use_res (bool): Whether to use residual connection. Default: False.
         num_extra_level (int): Number of extra conv layers for more feature levels.
             Default: 0.

diff --git a/modelscope/models/cv/face_recognition/torchkit/backbone/common.py b/modelscope/models/cv/face_recognition/torchkit/backbone/common.py
@@ -7,7 +7,7 @@
 
 
 def initialize_weights(modules):
-    """ Weight initilize, conv2d and linear is initialized with kaiming_normal
+    """ Weight initialize, conv2d and linear is initialized with kaiming_normal
     """
     for m in modules:
         if isinstance(m, nn.Conv2d):

diff --git a/modelscope/models/cv/face_reconstruction/models/facerecon_model.py b/modelscope/models/cv/face_reconstruction/models/facerecon_model.py
@@ -104,7 +104,7 @@ def __init__(self,
             zfar=opt.z_far,
             rasterize_size=int(2 * opt.center))
 
-        self.comupte_color_loss = photo_loss
+        self.compute_color_loss = photo_loss
 
     def set_device(self, device):
         self.device = device
@@ -444,7 +444,7 @@ def forward(self, visualize=False):
                         self.facemodel_front.face_buf, self.bfm_UVs.clone(),
                         pred_color_high)
 
-                    loss_color_high = self.w_color * self.comupte_color_loss(
+                    loss_color_high = self.w_color * self.compute_color_loss(
                         pred_face_high, self.input_img_for_tex,
                         self.pred_mask.detach())
                     loss_smooth = TVLoss()(texture_offset) * self.w_tex_smooth

diff --git a/modelscope/models/cv/face_reconstruction/models/losses.py b/modelscope/models/cv/face_reconstruction/models/losses.py
@@ -49,7 +49,7 @@ def perceptual_loss(id_featureA, id_featureB):
 # image level loss
 def photo_loss(imageA, imageB, mask, eps=1e-6):
     """
-    l2 norm (with sqrt, to ensure backward stabililty, use eps, otherwise Nan may occur)
+    l2 norm (with sqrt, to ensure backward stability, use eps, otherwise Nan may occur)
     Parameters:
         imageA       --torch.tensor (B, 3, H, W), range (0, 1), RGB order
         imageB       --same as imageA
@@ -170,7 +170,7 @@ def _tensor_size(self, t):
 
 def photo_loss_sum(imageA, imageB, mask, eps=1e-6):
     """
-    l2 norm (with sqrt, to ensure backward stabililty, use eps, otherwise Nan may occur)
+    l2 norm (with sqrt, to ensure backward stability, use eps, otherwise Nan may occur)
     Parameters:
         imageA       --torch.tensor (B, 3, H, W), range (0, 1), RGB order
         imageB       --same as imageA

diff --git a/modelscope/models/cv/face_reconstruction/models/pix2pix/networks.py b/modelscope/models/cv/face_reconstruction/models/pix2pix/networks.py
@@ -322,7 +322,7 @@ def get_target_tensor(self, prediction, target_is_real):
         """Create label tensors with the same size as the input.
 
         Parameters:
-            prediction (tensor) - - tpyically the prediction from a discriminator
+            prediction (tensor) - - typically the prediction from a discriminator
             target_is_real (bool) - - if the ground truth label is for real images or fake images
 
         Returns:
@@ -336,10 +336,10 @@ def get_target_tensor(self, prediction, target_is_real):
         return target_tensor.expand_as(prediction)
 
     def __call__(self, prediction, target_is_real):
-        """Calculate loss given Discriminator's output and grount truth labels.
+        """Calculate loss given Discriminator's output and ground truth labels.
 
         Parameters:
-            prediction (tensor) - - tpyically the prediction output from a discriminator
+            prediction (tensor) - - typically the prediction output from a discriminator
             target_is_real (bool) - - if the ground truth label is for real images or fake images
 
         Returns:

diff --git a/modelscope/models/cv/face_reconstruction/models/pix2pix/pix2pix_model.py b/modelscope/models/cv/face_reconstruction/models/pix2pix/pix2pix_model.py
@@ -121,5 +121,5 @@ def optimize_parameters(self):
         self.set_requires_grad(
             self.netD, False)  # D requires no gradients when optimizing G
         self.optimizer_G.zero_grad()  # set G's gradients to zero
-        self.backward_G()  # calculate graidents for G
-        self.optimizer_G.step()  # udpate G's weights
+        self.backward_G()  # calculate gradients for G
+        self.optimizer_G.step()  # update G's weights