_utils.pxd
5.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# Authors: Gilles Louppe <g.louppe@gmail.com>
# Peter Prettenhofer <peter.prettenhofer@gmail.com>
# Arnaud Joly <arnaud.v.joly@gmail.com>
# Jacob Schreiber <jmschreiber91@gmail.com>
# Nelson Liu <nelson@nelsonliu.me>
#
# License: BSD 3 clause
# See _utils.pyx for details.
import numpy as np
cimport numpy as np
from ._tree cimport Node
from ..neighbors._quad_tree cimport Cell
ctypedef np.npy_float32 DTYPE_t # Type of X
ctypedef np.npy_float64 DOUBLE_t # Type of y, sample_weight
ctypedef np.npy_intp SIZE_t # Type for indices and counters
ctypedef np.npy_int32 INT32_t # Signed 32 bit integer
ctypedef np.npy_uint32 UINT32_t # Unsigned 32 bit integer
cdef enum:
# Max value for our rand_r replacement (near the bottom).
# We don't use RAND_MAX because it's different across platforms and
# particularly tiny on Windows/MSVC.
RAND_R_MAX = 0x7FFFFFFF
# safe_realloc(&p, n) resizes the allocation of p to n * sizeof(*p) bytes or
# raises a MemoryError. It never calls free, since that's __dealloc__'s job.
# cdef DTYPE_t *p = NULL
# safe_realloc(&p, n)
# is equivalent to p = malloc(n * sizeof(*p)) with error checking.
ctypedef fused realloc_ptr:
# Add pointer types here as needed.
(DTYPE_t*)
(SIZE_t*)
(unsigned char*)
(WeightedPQueueRecord*)
(DOUBLE_t*)
(DOUBLE_t**)
(Node*)
(Cell*)
(Node**)
(StackRecord*)
(PriorityHeapRecord*)
cdef realloc_ptr safe_realloc(realloc_ptr* p, size_t nelems) nogil except *
cdef np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size)
cdef SIZE_t rand_int(SIZE_t low, SIZE_t high,
UINT32_t* random_state) nogil
cdef double rand_uniform(double low, double high,
UINT32_t* random_state) nogil
cdef double log(double x) nogil
# =============================================================================
# Stack data structure
# =============================================================================
# A record on the stack for depth-first tree growing
cdef struct StackRecord:
SIZE_t start
SIZE_t end
SIZE_t depth
SIZE_t parent
bint is_left
double impurity
SIZE_t n_constant_features
cdef class Stack:
cdef SIZE_t capacity
cdef SIZE_t top
cdef StackRecord* stack_
cdef bint is_empty(self) nogil
cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t depth, SIZE_t parent,
bint is_left, double impurity,
SIZE_t n_constant_features) nogil except -1
cdef int pop(self, StackRecord* res) nogil
# =============================================================================
# PriorityHeap data structure
# =============================================================================
# A record on the frontier for best-first tree growing
cdef struct PriorityHeapRecord:
SIZE_t node_id
SIZE_t start
SIZE_t end
SIZE_t pos
SIZE_t depth
bint is_leaf
double impurity
double impurity_left
double impurity_right
double improvement
cdef class PriorityHeap:
cdef SIZE_t capacity
cdef SIZE_t heap_ptr
cdef PriorityHeapRecord* heap_
cdef bint is_empty(self) nogil
cdef void heapify_up(self, PriorityHeapRecord* heap, SIZE_t pos) nogil
cdef void heapify_down(self, PriorityHeapRecord* heap, SIZE_t pos, SIZE_t heap_length) nogil
cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos,
SIZE_t depth, bint is_leaf, double improvement,
double impurity, double impurity_left,
double impurity_right) nogil except -1
cdef int pop(self, PriorityHeapRecord* res) nogil
# =============================================================================
# WeightedPQueue data structure
# =============================================================================
# A record stored in the WeightedPQueue
cdef struct WeightedPQueueRecord:
DOUBLE_t data
DOUBLE_t weight
cdef class WeightedPQueue:
cdef SIZE_t capacity
cdef SIZE_t array_ptr
cdef WeightedPQueueRecord* array_
cdef bint is_empty(self) nogil
cdef int reset(self) nogil except -1
cdef SIZE_t size(self) nogil
cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except -1
cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil
cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil
cdef int peek(self, DOUBLE_t* data, DOUBLE_t* weight) nogil
cdef DOUBLE_t get_weight_from_index(self, SIZE_t index) nogil
cdef DOUBLE_t get_value_from_index(self, SIZE_t index) nogil
# =============================================================================
# WeightedMedianCalculator data structure
# =============================================================================
cdef class WeightedMedianCalculator:
cdef SIZE_t initial_capacity
cdef WeightedPQueue samples
cdef DOUBLE_t total_weight
cdef SIZE_t k
cdef DOUBLE_t sum_w_0_k # represents sum(weights[0:k])
# = w[0] + w[1] + ... + w[k-1]
cdef SIZE_t size(self) nogil
cdef int push(self, DOUBLE_t data, DOUBLE_t weight) nogil except -1
cdef int reset(self) nogil except -1
cdef int update_median_parameters_post_push(
self, DOUBLE_t data, DOUBLE_t weight,
DOUBLE_t original_median) nogil
cdef int remove(self, DOUBLE_t data, DOUBLE_t weight) nogil
cdef int pop(self, DOUBLE_t* data, DOUBLE_t* weight) nogil
cdef int update_median_parameters_post_remove(
self, DOUBLE_t data, DOUBLE_t weight,
DOUBLE_t original_median) nogil
cdef DOUBLE_t get_median(self) nogil