Class: Google::Cloud::Vision::V1::TextAnnotation

Inherits:
Object
  • Object
show all
Defined in:
lib/google/cloud/vision/v1/doc/google/cloud/vision/v1/text_annotation.rb

Overview

TextAnnotation contains a structured representation of OCR extracted text. The hierarchy of an OCR extracted text structure is like this: TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol Each structural component, starting from Page, may further have their own properties. Properties describe detected languages, breaks etc.. Please refer to the TextAnnotation::TextProperty message definition below for more detail.

Defined Under Namespace

Classes: DetectedBreak, DetectedLanguage, TextProperty

Instance Attribute Summary collapse

Instance Attribute Details

#pagesArray<Google::Cloud::Vision::V1::Page>

Returns List of pages detected by OCR.

Returns:



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/google/cloud/vision/v1/doc/google/cloud/vision/v1/text_annotation.rb', line 33

class TextAnnotation
  # Detected language for a structural component.
  # @!attribute [rw] language_code
  #   @return [String]
  #     The BCP-47 language code, such as "en-US" or "sr-Latn". For more
  #     information, see
  #     http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
  # @!attribute [rw] confidence
  #   @return [Float]
  #     Confidence of detected language. Range [0, 1].
  class DetectedLanguage; end

  # Detected start or end of a structural component.
  # @!attribute [rw] type
  #   @return [Google::Cloud::Vision::V1::TextAnnotation::DetectedBreak::BreakType]
  #     Detected break type.
  # @!attribute [rw] is_prefix
  #   @return [true, false]
  #     True if break prepends the element.
  class DetectedBreak
    # Enum to denote the type of break found. New line, space etc.
    module BreakType
      # Unknown break label type.
      UNKNOWN = 0

      # Regular space.
      SPACE = 1

      # Sure space (very wide).
      SURE_SPACE = 2

      # Line-wrapping break.
      EOL_SURE_SPACE = 3

      # End-line hyphen that is not present in text; does not co-occur with
      # +SPACE+, +LEADER_SPACE+, or +LINE_BREAK+.
      HYPHEN = 4

      # Line break that ends a paragraph.
      LINE_BREAK = 5
    end
  end

  # Additional information detected on the structural component.
  # @!attribute [rw] detected_languages
  #   @return [Array<Google::Cloud::Vision::V1::TextAnnotation::DetectedLanguage>]
  #     A list of detected languages together with confidence.
  # @!attribute [rw] detected_break
  #   @return [Google::Cloud::Vision::V1::TextAnnotation::DetectedBreak]
  #     Detected start or end of a text segment.
  class TextProperty; end
end

#textString

Returns UTF-8 text detected on the pages.

Returns:

  • (String)

    UTF-8 text detected on the pages.



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/google/cloud/vision/v1/doc/google/cloud/vision/v1/text_annotation.rb', line 33

class TextAnnotation
  # Detected language for a structural component.
  # @!attribute [rw] language_code
  #   @return [String]
  #     The BCP-47 language code, such as "en-US" or "sr-Latn". For more
  #     information, see
  #     http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
  # @!attribute [rw] confidence
  #   @return [Float]
  #     Confidence of detected language. Range [0, 1].
  class DetectedLanguage; end

  # Detected start or end of a structural component.
  # @!attribute [rw] type
  #   @return [Google::Cloud::Vision::V1::TextAnnotation::DetectedBreak::BreakType]
  #     Detected break type.
  # @!attribute [rw] is_prefix
  #   @return [true, false]
  #     True if break prepends the element.
  class DetectedBreak
    # Enum to denote the type of break found. New line, space etc.
    module BreakType
      # Unknown break label type.
      UNKNOWN = 0

      # Regular space.
      SPACE = 1

      # Sure space (very wide).
      SURE_SPACE = 2

      # Line-wrapping break.
      EOL_SURE_SPACE = 3

      # End-line hyphen that is not present in text; does not co-occur with
      # +SPACE+, +LEADER_SPACE+, or +LINE_BREAK+.
      HYPHEN = 4

      # Line break that ends a paragraph.
      LINE_BREAK = 5
    end
  end

  # Additional information detected on the structural component.
  # @!attribute [rw] detected_languages
  #   @return [Array<Google::Cloud::Vision::V1::TextAnnotation::DetectedLanguage>]
  #     A list of detected languages together with confidence.
  # @!attribute [rw] detected_break
  #   @return [Google::Cloud::Vision::V1::TextAnnotation::DetectedBreak]
  #     Detected start or end of a text segment.
  class TextProperty; end
end