mdds
aos/block_util.hpp
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*************************************************************************
3 *
4 * Copyright (c) 2021 Kohei Yoshida
5 *
6 * Permission is hereby granted, free of charge, to any person
7 * obtaining a copy of this software and associated documentation
8 * files (the "Software"), to deal in the Software without
9 * restriction, including without limitation the rights to use,
10 * copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following
13 * conditions:
14 *
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 * OTHER DEALINGS IN THE SOFTWARE.
26 *
27 ************************************************************************/
28
29#ifndef INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_AOS_BLOCK_UTIL_HPP
30#define INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_AOS_BLOCK_UTIL_HPP
31
32#include "mdds/global.hpp"
33#include "../types.hpp"
34
35namespace mdds { namespace mtv { namespace aos { namespace detail {
36
37template<typename Blks, lu_factor_t F>
39{
40 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
41 {
42 static_assert(invalid_static_int<F>, "The loop-unrolling factor must be one of 0, 4, 8, 16, or 32.");
43 }
44};
45
46template<typename Blks>
47struct adjust_block_positions<Blks, lu_factor_t::none>
48{
49 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
50 {
51 int64_t n = blocks.size();
52
53 if (start_block_index >= n)
54 return;
55
56#if MDDS_USE_OPENMP
57#pragma omp parallel for
58#endif
59 for (int64_t i = start_block_index; i < n; ++i)
60 blocks[i].position += delta;
61 }
62};
63
64template<typename Blks>
65struct adjust_block_positions<Blks, lu_factor_t::lu4>
66{
67 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
68 {
69 int64_t n = blocks.size();
70
71 if (start_block_index >= n)
72 return;
73
74 // Ensure that the section length is divisible by 4.
75 int64_t len = n - start_block_index;
76 int64_t rem = len & 3; // % 4
77 len -= rem;
78 len += start_block_index;
79#if MDDS_USE_OPENMP
80#pragma omp parallel for
81#endif
82 for (int64_t i = start_block_index; i < len; i += 4)
83 {
84 blocks[i].position += delta;
85 blocks[i + 1].position += delta;
86 blocks[i + 2].position += delta;
87 blocks[i + 3].position += delta;
88 }
89
90 rem += len;
91 for (int64_t i = len; i < rem; ++i)
92 blocks[i].position += delta;
93 }
94};
95
96template<typename Blks>
97struct adjust_block_positions<Blks, lu_factor_t::lu8>
98{
99 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
100 {
101 int64_t n = blocks.size();
102
103 if (start_block_index >= n)
104 return;
105
106 // Ensure that the section length is divisible by 8.
107 int64_t len = n - start_block_index;
108 int64_t rem = len & 7; // % 8
109 len -= rem;
110 len += start_block_index;
111#if MDDS_USE_OPENMP
112#pragma omp parallel for
113#endif
114 for (int64_t i = start_block_index; i < len; i += 8)
115 {
116 blocks[i].position += delta;
117 blocks[i + 1].position += delta;
118 blocks[i + 2].position += delta;
119 blocks[i + 3].position += delta;
120 blocks[i + 4].position += delta;
121 blocks[i + 5].position += delta;
122 blocks[i + 6].position += delta;
123 blocks[i + 7].position += delta;
124 }
125
126 rem += len;
127 for (int64_t i = len; i < rem; ++i)
128 blocks[i].position += delta;
129 }
130};
131
132template<typename Blks>
133struct adjust_block_positions<Blks, lu_factor_t::lu16>
134{
135 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
136 {
137 int64_t n = blocks.size();
138
139 if (start_block_index >= n)
140 return;
141
142 // Ensure that the section length is divisible by 16.
143 int64_t len = n - start_block_index;
144 int64_t rem = len & 15; // % 16
145 len -= rem;
146 len += start_block_index;
147#if MDDS_USE_OPENMP
148#pragma omp parallel for
149#endif
150 for (int64_t i = start_block_index; i < len; i += 16)
151 {
152 blocks[i].position += delta;
153 blocks[i + 1].position += delta;
154 blocks[i + 2].position += delta;
155 blocks[i + 3].position += delta;
156 blocks[i + 4].position += delta;
157 blocks[i + 5].position += delta;
158 blocks[i + 6].position += delta;
159 blocks[i + 7].position += delta;
160 blocks[i + 8].position += delta;
161 blocks[i + 9].position += delta;
162 blocks[i + 10].position += delta;
163 blocks[i + 11].position += delta;
164 blocks[i + 12].position += delta;
165 blocks[i + 13].position += delta;
166 blocks[i + 14].position += delta;
167 blocks[i + 15].position += delta;
168 }
169
170 rem += len;
171 for (int64_t i = len; i < rem; ++i)
172 blocks[i].position += delta;
173 }
174};
175
176template<typename Blks>
177struct adjust_block_positions<Blks, lu_factor_t::lu32>
178{
179 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
180 {
181 int64_t n = blocks.size();
182
183 if (start_block_index >= n)
184 return;
185
186 // Ensure that the section length is divisible by 32.
187 int64_t len = n - start_block_index;
188 int64_t rem = len & 31; // % 32
189 len -= rem;
190 len += start_block_index;
191#if MDDS_USE_OPENMP
192#pragma omp parallel for
193#endif
194 for (int64_t i = start_block_index; i < len; i += 32)
195 {
196 blocks[i].position += delta;
197 blocks[i + 1].position += delta;
198 blocks[i + 2].position += delta;
199 blocks[i + 3].position += delta;
200 blocks[i + 4].position += delta;
201 blocks[i + 5].position += delta;
202 blocks[i + 6].position += delta;
203 blocks[i + 7].position += delta;
204 blocks[i + 8].position += delta;
205 blocks[i + 9].position += delta;
206 blocks[i + 10].position += delta;
207 blocks[i + 11].position += delta;
208 blocks[i + 12].position += delta;
209 blocks[i + 13].position += delta;
210 blocks[i + 14].position += delta;
211 blocks[i + 15].position += delta;
212 blocks[i + 16].position += delta;
213 blocks[i + 17].position += delta;
214 blocks[i + 18].position += delta;
215 blocks[i + 19].position += delta;
216 blocks[i + 20].position += delta;
217 blocks[i + 21].position += delta;
218 blocks[i + 22].position += delta;
219 blocks[i + 23].position += delta;
220 blocks[i + 24].position += delta;
221 blocks[i + 25].position += delta;
222 blocks[i + 26].position += delta;
223 blocks[i + 27].position += delta;
224 blocks[i + 28].position += delta;
225 blocks[i + 29].position += delta;
226 blocks[i + 30].position += delta;
227 blocks[i + 31].position += delta;
228 }
229
230 rem += len;
231 for (int64_t i = len; i < rem; ++i)
232 blocks[i].position += delta;
233 }
234};
235
236}}}} // namespace mdds::mtv::aos::detail
237
238#endif
239
240/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: aos/block_util.hpp:39