Lecture 28 - Strings

We've been using strings from the beginning of the semester. We know that they are immutable, but we can form now strings by gluing them together using ++ and using interpolation: "Some string {expression}"

type String =  {
  * (n: Number) -> String
  // returns a string that contains n repetitions of self, so "abc" * 3 = "abcabcabc"
  
  ++(other: Object) -> String
  // returns a string that is the concatenation of self and other.asString

  < (other: String)
  // true if self precedes other lexicographically 
  
  <= (other: String)
  // self == other || self < other
  
  == (other: Object)
  // true if other is a String and is equal to self
  
  != (other: Object)
  \= (other: Object)
  // (self == other).not
  
  > (other: String)
  // true if other precedes self lexicographically 
   
  at(index: Number) -> String
  // returns the character in position index (as a string of size 1)
   
  asLower -> String
  // returns a string like self, except that all letters are in lower case
        
  asNumber -> Number
  // attempts to parse self as a number;  returns that number, or NaN if it can't
  
  asString -> String
  // returns self, naturally
  
  asUpper -> String
  // returns a string like self, except that all letters are in upper case
        
  capitalized -> String
  // returns a string like self, except that the initial letters of all words are in upper case
        
  compare(other:String) -> Number
  // a three-way comparison: -1 if (self < other), 0 if (self == other), and +1 if (self > other)
  // This is useful when writing a comparison function for \code{sortBy}
  
  contains(other:String) -> Number
  // returns true if other is a contiguous substring of self
  
  endsWith(possibleSuffix: String)
  // true if self ends with possibleSuffix
  
  indexOf(sub:String) -> Number
  // returns the leftmost index at which sub appears in self, or 0 if it is not there.  
  
  indexOf(sub:String) ifAbsent(absent:Block0<W>) -> Number | W
  // returns the leftmost index at which sub appears in self; applies absent if it is not there. 
  
  indexOf(pattern:String)startingAt(offset)ifAbsent (action:Block0<W>) -> Number | W 
  // like the above, except that it returns the first index $\geq$ offset.
  
  indices -> IteratorFactory
  // an object representing the range of indices of self (1..self.size)
  
  isEmpty -> Boolean
  // true if self is the empty string
  
  lastIndexOf() -> Number
  // returns the rightmost index at which sub appears in self, or 0 if it is not there.
  
  lastIndexOf() ifAbsent(absent:Block0<W>) -> Number | W
  // returns the rightmost index at which sub appears in self; applies absent if it is not there. 
  
  lastIndexOf(pattern:String)startingAt(offset)ifAbsent (action:Block0<W>) -> Number | W
  // like the above, except that it returns the last index $\leq$ offset.

  ord -> Number
  // a numeric representation of the first character of self, or NaN if self is empty.   
  
  replace(pattern: String) with (new: String) -> String
  // a string like self, but with all occurrences of pattern replaced by new
            
  size -> Number
  // returns the size of self, i.e., the number of characters it contains.
   
  startsWith(possiblePrefix: String) -> Boolean
  // true when possiblePrefix is a prefix of self
 
  startsWithDigit -> Boolean
  // true if the first character of self is a (Unicode) digit.
  
  startsWithLetter -> Boolean
  // true if the first character of self is a (Unicode) letter
  
  startsWithPeriod -> Boolean
  // true if the first character of self is a period
  
  startsWithSpace -> Boolean
  // true if the first character of self is a (Unicode) space.
   
  substringFrom(start: Number) size (max:Number)
  // the substring of self starting at index start and of length max characters,
  // or extending to the end of self if that is less than max.    If start = self.size + 1, or
  // stop < start, the empty string is returned.   If start is outside the range
  // 1..self.size+1, BoundsError is raised.
  
  substringFrom(start: Number) to (stop: Number) -> String
  // returns the substring of self starting at index start and extending
  // either to the end of self, or to stop.    If start = self.size + 1, or
  // stop < start, the empty string is returned.   If start is outside the range
  // 1..self.size+1, BoundsError is raised.
  
  trim -> String
  // a string like self except that leading and trailing spaces are omitted.
}

The most useful methods are size, indexOf (currently broken if item not found), indexOf()ifAbsent, indexOf()startingAt()ifAbsent, asLower, asUpper, replace()with(), and substringFrom()size and substringFrom()to()

Here is a program to find all occurrences of a word in a text:

method wordCount(page:String,word:String)-> Number {
  var count: Number := 0
  var nextLocn: Number := page.indexOf(word)
  while{nextLocn > 0} do {
    count := count + 1
    nextLocn := page.indexOf(word)startingAt(nextLocn+word.size)
  }
  count
}

Or if we want to use the "ifAbsent" variant:

method wordCountIfAbsent(page:String,word:String)-> Number {
  var count: Number := 0
  var nextLocn: Number := -word.size
  while{true} do {
    nextLocn := page.indexOf(word)startingAt(nextLocn+word.size)ifAbsent{
      return count
    }
    count := count + 1
  }
}