...

Source file src/sort/sort.go

Documentation: sort

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate go run gen_sort_variants.go
     6  
     7  // Package sort provides primitives for sorting slices and user-defined collections.
     8  package sort
     9  
    10  import "math/bits"
    11  
    12  // An implementation of Interface can be sorted by the routines in this package.
    13  // The methods refer to elements of the underlying collection by integer index.
    14  type Interface interface {
    15  	// Len is the number of elements in the collection.
    16  	Len() int
    17  
    18  	// Less reports whether the element with index i
    19  	// must sort before the element with index j.
    20  	//
    21  	// If both Less(i, j) and Less(j, i) are false,
    22  	// then the elements at index i and j are considered equal.
    23  	// Sort may place equal elements in any order in the final result,
    24  	// while Stable preserves the original input order of equal elements.
    25  	//
    26  	// Less must describe a transitive ordering:
    27  	//  - if both Less(i, j) and Less(j, k) are true, then Less(i, k) must be true as well.
    28  	//  - if both Less(i, j) and Less(j, k) are false, then Less(i, k) must be false as well.
    29  	//
    30  	// Note that floating-point comparison (the < operator on float32 or float64 values)
    31  	// is not a transitive ordering when not-a-number (NaN) values are involved.
    32  	// See Float64Slice.Less for a correct implementation for floating-point values.
    33  	Less(i, j int) bool
    34  
    35  	// Swap swaps the elements with indexes i and j.
    36  	Swap(i, j int)
    37  }
    38  
    39  // Sort sorts data in ascending order as determined by the Less method.
    40  // It makes one call to data.Len to determine n and O(n*log(n)) calls to
    41  // data.Less and data.Swap. The sort is not guaranteed to be stable.
    42  func Sort(data Interface) {
    43  	n := data.Len()
    44  	if n <= 1 {
    45  		return
    46  	}
    47  	limit := bits.Len(uint(n))
    48  	pdqsort(data, 0, n, limit)
    49  }
    50  
    51  type sortedHint int // hint for pdqsort when choosing the pivot
    52  
    53  const (
    54  	unknownHint sortedHint = iota
    55  	increasingHint
    56  	decreasingHint
    57  )
    58  
    59  // xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf
    60  type xorshift uint64
    61  
    62  func (r *xorshift) Next() uint64 {
    63  	*r ^= *r << 13
    64  	*r ^= *r >> 17
    65  	*r ^= *r << 5
    66  	return uint64(*r)
    67  }
    68  
    69  func nextPowerOfTwo(length int) uint {
    70  	shift := uint(bits.Len(uint(length)))
    71  	return uint(1 << shift)
    72  }
    73  
    74  // lessSwap is a pair of Less and Swap function for use with the
    75  // auto-generated func-optimized variant of sort.go in
    76  // zfuncversion.go.
    77  type lessSwap struct {
    78  	Less func(i, j int) bool
    79  	Swap func(i, j int)
    80  }
    81  
    82  type reverse struct {
    83  	// This embedded Interface permits Reverse to use the methods of
    84  	// another Interface implementation.
    85  	Interface
    86  }
    87  
    88  // Less returns the opposite of the embedded implementation's Less method.
    89  func (r reverse) Less(i, j int) bool {
    90  	return r.Interface.Less(j, i)
    91  }
    92  
    93  // Reverse returns the reverse order for data.
    94  func Reverse(data Interface) Interface {
    95  	return &reverse{data}
    96  }
    97  
    98  // IsSorted reports whether data is sorted.
    99  func IsSorted(data Interface) bool {
   100  	n := data.Len()
   101  	for i := n - 1; i > 0; i-- {
   102  		if data.Less(i, i-1) {
   103  			return false
   104  		}
   105  	}
   106  	return true
   107  }
   108  
   109  // Convenience types for common cases
   110  
   111  // IntSlice attaches the methods of Interface to []int, sorting in increasing order.
   112  type IntSlice []int
   113  
   114  func (x IntSlice) Len() int           { return len(x) }
   115  func (x IntSlice) Less(i, j int) bool { return x[i] < x[j] }
   116  func (x IntSlice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   117  
   118  // Sort is a convenience method: x.Sort() calls Sort(x).
   119  func (x IntSlice) Sort() { Sort(x) }
   120  
   121  // Float64Slice implements Interface for a []float64, sorting in increasing order,
   122  // with not-a-number (NaN) values ordered before other values.
   123  type Float64Slice []float64
   124  
   125  func (x Float64Slice) Len() int { return len(x) }
   126  
   127  // Less reports whether x[i] should be ordered before x[j], as required by the sort Interface.
   128  // Note that floating-point comparison by itself is not a transitive relation: it does not
   129  // report a consistent ordering for not-a-number (NaN) values.
   130  // This implementation of Less places NaN values before any others, by using:
   131  //
   132  //	x[i] < x[j] || (math.IsNaN(x[i]) && !math.IsNaN(x[j]))
   133  func (x Float64Slice) Less(i, j int) bool { return x[i] < x[j] || (isNaN(x[i]) && !isNaN(x[j])) }
   134  func (x Float64Slice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   135  
   136  // isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
   137  func isNaN(f float64) bool {
   138  	return f != f
   139  }
   140  
   141  // Sort is a convenience method: x.Sort() calls Sort(x).
   142  func (x Float64Slice) Sort() { Sort(x) }
   143  
   144  // StringSlice attaches the methods of Interface to []string, sorting in increasing order.
   145  type StringSlice []string
   146  
   147  func (x StringSlice) Len() int           { return len(x) }
   148  func (x StringSlice) Less(i, j int) bool { return x[i] < x[j] }
   149  func (x StringSlice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   150  
   151  // Sort is a convenience method: x.Sort() calls Sort(x).
   152  func (x StringSlice) Sort() { Sort(x) }
   153  
   154  // Convenience wrappers for common cases
   155  
   156  // Ints sorts a slice of ints in increasing order.
   157  func Ints(x []int) { Sort(IntSlice(x)) }
   158  
   159  // Float64s sorts a slice of float64s in increasing order.
   160  // Not-a-number (NaN) values are ordered before other values.
   161  func Float64s(x []float64) { Sort(Float64Slice(x)) }
   162  
   163  // Strings sorts a slice of strings in increasing order.
   164  func Strings(x []string) { Sort(StringSlice(x)) }
   165  
   166  // IntsAreSorted reports whether the slice x is sorted in increasing order.
   167  func IntsAreSorted(x []int) bool { return IsSorted(IntSlice(x)) }
   168  
   169  // Float64sAreSorted reports whether the slice x is sorted in increasing order,
   170  // with not-a-number (NaN) values before any other values.
   171  func Float64sAreSorted(x []float64) bool { return IsSorted(Float64Slice(x)) }
   172  
   173  // StringsAreSorted reports whether the slice x is sorted in increasing order.
   174  func StringsAreSorted(x []string) bool { return IsSorted(StringSlice(x)) }
   175  
   176  // Notes on stable sorting:
   177  // The used algorithms are simple and provable correct on all input and use
   178  // only logarithmic additional stack space. They perform well if compared
   179  // experimentally to other stable in-place sorting algorithms.
   180  //
   181  // Remarks on other algorithms evaluated:
   182  //  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
   183  //    Not faster.
   184  //  - GCC's __rotate for block rotations: Not faster.
   185  //  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
   186  //    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
   187  //    The given algorithms are in-place, number of Swap and Assignments
   188  //    grow as n log n but the algorithm is not stable.
   189  //  - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and
   190  //    V. Raman in Algorithmica (1996) 16, 115-160:
   191  //    This algorithm either needs additional 2n bits or works only if there
   192  //    are enough different elements available to encode some permutations
   193  //    which have to be undone later (so not stable on any input).
   194  //  - All the optimal in-place sorting/merging algorithms I found are either
   195  //    unstable or rely on enough different elements in each step to encode the
   196  //    performed block rearrangements. See also "In-Place Merging Algorithms",
   197  //    Denham Coates-Evely, Department of Computer Science, Kings College,
   198  //    January 2004 and the references in there.
   199  //  - Often "optimal" algorithms are optimal in the number of assignments
   200  //    but Interface has only Swap as operation.
   201  
   202  // Stable sorts data in ascending order as determined by the Less method,
   203  // while keeping the original order of equal elements.
   204  //
   205  // It makes one call to data.Len to determine n, O(n*log(n)) calls to
   206  // data.Less and O(n*log(n)*log(n)) calls to data.Swap.
   207  func Stable(data Interface) {
   208  	stable(data, data.Len())
   209  }
   210  
   211  /*
   212  Complexity of Stable Sorting
   213  
   214  
   215  Complexity of block swapping rotation
   216  
   217  Each Swap puts one new element into its correct, final position.
   218  Elements which reach their final position are no longer moved.
   219  Thus block swapping rotation needs |u|+|v| calls to Swaps.
   220  This is best possible as each element might need a move.
   221  
   222  Pay attention when comparing to other optimal algorithms which
   223  typically count the number of assignments instead of swaps:
   224  E.g. the optimal algorithm of Dudzinski and Dydek for in-place
   225  rotations uses O(u + v + gcd(u,v)) assignments which is
   226  better than our O(3 * (u+v)) as gcd(u,v) <= u.
   227  
   228  
   229  Stable sorting by SymMerge and BlockSwap rotations
   230  
   231  SymMerg complexity for same size input M = N:
   232  Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
   233  Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
   234  
   235  (The following argument does not fuzz over a missing -1 or
   236  other stuff which does not impact the final result).
   237  
   238  Let n = data.Len(). Assume n = 2^k.
   239  
   240  Plain merge sort performs log(n) = k iterations.
   241  On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
   242  
   243  Thus iteration i of merge sort performs:
   244  Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
   245  Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
   246  
   247  In total k = log(n) iterations are performed; so in total:
   248  Calls to Less O(log(n) * n)
   249  Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
   250     = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
   251  
   252  
   253  Above results should generalize to arbitrary n = 2^k + p
   254  and should not be influenced by the initial insertion sort phase:
   255  Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
   256  size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
   257  Merge sort iterations start at i = log(bs). With t = log(bs) constant:
   258  Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
   259     = O(n * log(n))
   260  Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
   261  
   262  */
   263  

View as plain text