_temporal.py 906 B

1234567891011121314151617181920212223242526
  1. from typing import Any, Dict
  2. import torch
  3. from torchvision.transforms.v2 import functional as F, Transform
  4. class UniformTemporalSubsample(Transform):
  5. """Uniformly subsample ``num_samples`` indices from the temporal dimension of the video.
  6. Videos are expected to be of shape ``[..., T, C, H, W]`` where ``T`` denotes the temporal dimension.
  7. When ``num_samples`` is larger than the size of temporal dimension of the video, it
  8. will sample frames based on nearest neighbor interpolation.
  9. Args:
  10. num_samples (int): The number of equispaced samples to be selected
  11. """
  12. _transformed_types = (torch.Tensor,)
  13. def __init__(self, num_samples: int):
  14. super().__init__()
  15. self.num_samples = num_samples
  16. def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
  17. return self._call_kernel(F.uniform_temporal_subsample, inpt, self.num_samples)