class String

Public Instance Methods

ascii() click to toggle source
# File lib/sup/util.rb, line 434
def ascii
  out = ""
  each_byte do |b|
    if (b & 128) != 0
      out << "\\x#{b.to_s 16}"
    else
      out << b.chr
    end
  end
  out = out.fix_encoding! # this should now be an utf-8 string of ascii
                         # compat chars.
end
camel_to_hyphy() click to toggle source
# File lib/sup/util.rb, line 260
def camel_to_hyphy
  self.gsub(/([a-z])([A-Z0-9])/, '\1-\2').downcase
end
check() click to toggle source
# File lib/sup/util.rb, line 425
def check
  begin
    fail "unexpected encoding #{encoding}" if respond_to?(:encoding) && !(encoding == Encoding::UTF_8 || encoding == Encoding::ASCII)
    fail "invalid encoding" if respond_to?(:valid_encoding?) && !valid_encoding?
  rescue
    raise CheckError.new($!.message)
  end
end
display_length() click to toggle source
# File lib/sup/util.rb, line 241
def display_length
  @display_length ||= Unicode.width(self.fix_encoding!, false)

  # if Unicode.width fails and returns -1, fall back to
  # regular String#length, see pull-request: #256.
  if @display_length < 0
    @display_length = self.length
  end

  @display_length
end
each(&b) click to toggle source
# File lib/sup/util.rb, line 412
def each &b
  each_line &b
end
find_all_positions(x) click to toggle source
# File lib/sup/util.rb, line 264
def find_all_positions x
  ret = []
  start = 0
  while start < length
    pos = index x, start
    break if pos.nil?
    ret << pos
    start = pos + 1
  end
  ret
end
fix_encoding!() click to toggle source

Fix the damn string! make sure it is valid utf-8, then convert to user encoding.

# File lib/sup/util.rb, line 358
def fix_encoding!
  # first try to encode to utf-8 from whatever current encoding
  encode!('UTF-8', :invalid => :replace, :undef => :replace)

  # do this anyway in case string is set to be UTF-8, encoding to
  # something else (UTF-16 which can fully represent UTF-8) and back
  # ensures invalid chars are replaced.
  encode!('UTF-16', 'UTF-8', :invalid => :replace, :undef => :replace)
  encode!('UTF-8', 'UTF-16', :invalid => :replace, :undef => :replace)

  fail "Could not create valid UTF-8 string out of: '#{self.to_s}'." unless valid_encoding?

  # now convert to $encoding
  encode!($encoding, :invalid => :replace, :undef => :replace)

  fail "Could not create valid #{$encoding.inspect} string out of: '#{self.to_s}'." unless valid_encoding?

  self
end
normalize_whitespace() click to toggle source
# File lib/sup/util.rb, line 400
def normalize_whitespace
  fix_encoding!
  gsub(/\t/, "    ").gsub(/\r/, "")
end
ord() click to toggle source
# File lib/sup/util.rb, line 406
def ord
  self[0]
end
slice_by_display_length(len) click to toggle source
# File lib/sup/util.rb, line 253
def slice_by_display_length len
  each_char.each_with_object "" do |c, buffer|
    len -= c.display_length
    buffer << c if len >= 0
  end
end
split_on_commas() click to toggle source

a very complicated regex found on teh internets to split on commas, unless they occurr within double quotes.

# File lib/sup/util.rb, line 278
def split_on_commas
  normalize_whitespace().split(/,\s*(?=(?:[^"]*"[^"]*")*(?![^"]*"))/)
end
split_on_commas_with_remainder() click to toggle source

ok, here we do it the hard way. got to have a remainder for purposes of tab-completing full email addresses

# File lib/sup/util.rb, line 284
def split_on_commas_with_remainder
  ret = []
  state = :outstring
  pos = 0
  region_start = 0
  while pos <= length
    newpos = case state
      when :escaped_instring, :escaped_outstring then pos
      else index(/[,"\\]/, pos)
    end

    if newpos
      char = self[newpos]
    else
      char = nil
      newpos = length
    end

    case char
    when ?"
      state = case state
        when :outstring then :instring
        when :instring then :outstring
        when :escaped_instring then :instring
        when :escaped_outstring then :outstring
      end
    when ?,, nil
      state = case state
        when :outstring, :escaped_outstring then
          ret << self[region_start ... newpos].gsub(/^\s+|\s+$/, "")
          region_start = newpos + 1
          :outstring
        when :instring then :instring
        when :escaped_instring then :instring
      end
    when ?\\
      state = case state
        when :instring then :escaped_instring
        when :outstring then :escaped_outstring
        when :escaped_instring then :instring
        when :escaped_outstring then :outstring
      end
    end
    pos = newpos + 1
  end

  remainder = case state
    when :instring
      self[region_start .. -1].gsub(/^\s+/, "")
    else
      nil
    end

  [ret, remainder]
end
to_set_of_symbols(split_on=nil;) click to toggle source

takes a list of words, and returns an array of symbols. typically used in Sup for translating Xapian's representation of a list of labels (a string) to an array of label symbols.

split_on will be passed to String#split, so you can leave this nil for space.

# File lib/sup/util.rb, line 422
def to_set_of_symbols split_on=nil; Set.new split(split_on).map { |x| x.strip.intern } end
transcode(to_encoding, from_encoding) click to toggle source

transcode the string if original encoding is know fix if broken.

# File lib/sup/util.rb, line 380
def transcode to_encoding, from_encoding
  begin
    encode!(to_encoding, from_encoding, :invalid => :replace, :undef => :replace)

    unless valid_encoding?
      # fix encoding (through UTF-8)
      encode!('UTF-16', from_encoding, :invalid => :replace, :undef => :replace)
      encode!(to_encoding, 'UTF-16', :invalid => :replace, :undef => :replace)
    end

  rescue Encoding::ConverterNotFoundError
    debug "Encoding converter not found for #{from_encoding.inspect} or #{to_encoding.inspect}, fixing string: '#{self.to_s}', but expect weird characters."
    fix_encoding!
  end

  fail "Could not create valid #{to_encoding.inspect} string out of: '#{self.to_s}'." unless valid_encoding?

  self
end
wrap(len) click to toggle source
# File lib/sup/util.rb, line 340
def wrap len
  ret = []
  s = self
  while s.display_length > len
    cut = s.slice_by_display_length(len).rindex(/\s/)
    if cut
      ret << s[0 ... cut]
      s = s[(cut + 1) .. -1]
    else
      ret << s.slice_by_display_length(len)
      s = s[ret.last.length .. -1]
    end
  end
  ret << s
end