diff --git a/Cargo.toml b/Cargo.toml
index d5967b0..1533e3f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,8 +3,16 @@ name = "sherpa_ros2_pointcloud_downsampling"
 version = "0.1.0"
 edition = "2024"
 
+[profile.dev]
+opt-level = 1
+
+[profile.release]
+opt-level = 3
+lto = true            # Link Time Optimization (makes final binary faster)
+codegen-units = 1     # Better for speed (slower compile)
+
 [dependencies]
 byteorder = "1.5.0"
 futures = "0.3.31"
 itertools = "0.14.0"
-r2r = "0.9.5"
\ No newline at end of file
+r2r = "0.9.5"
diff --git a/src/main.rs b/src/main.rs
index 675841f..463f0ba 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -7,63 +7,77 @@ use std::f32::consts::PI;
 use std::ops::{Add, Sub};
 use std::time::{Duration, Instant};
 
+trait GetOffset {
+    fn get_offset(&self, field: &str) -> Option<usize>;
+}
+
+impl GetOffset for Vec<PointField> {
+    fn get_offset(&self, field: &str) -> Option<usize> {
+        for point_field in self {
+            if field == point_field.name {
+                return Some(point_field.offset as usize);
+            }
+        }
+        None
+    }
+}
+
 trait Downsample {
     fn downsample(self) -> Self;
 }
 
 impl Downsample for PointCloud2 {
     fn downsample(self) -> Self {
-        let data: Vec<u8> = self
-            .data
+        let offset_x: usize = self.fields.get_offset("x").unwrap();
+        let offset_y: usize = self.fields.get_offset("y").unwrap();
+        let offset_z: usize = self.fields.get_offset("z").unwrap();
+
+        let mut data: Vec<u8> = Vec::with_capacity(self.data.len());
+        self.data
             .chunks_exact(self.point_step as usize)
-            .map(|chunk| PointF32::from_bytes(chunk))
-            .filter(|point| point.is_valid())
+            .enumerate()
+            .map(|(point_nr, chunk)| {
+                (
+                    point_nr,
+                    match self.is_bigendian {
+                        true => VectorF32::new(
+                            BigEndian::read_f32(&chunk[offset_x..offset_x + 4]),
+                            BigEndian::read_f32(&chunk[offset_y..offset_y + 4]),
+                            BigEndian::read_f32(&chunk[offset_z..offset_z + 4]),
+                        ),
+                        false => VectorF32::new(
+                            LittleEndian::read_f32(&chunk[offset_x..offset_x + 4]),
+                            LittleEndian::read_f32(&chunk[offset_y..offset_y + 4]),
+                            LittleEndian::read_f32(&chunk[offset_z..offset_z + 4]),
+                        ),
+                    },
+                )
+            })
+            .filter(|(_, point)| point.is_valid())
             .tuple_windows::<(_, _, _)>()
-            .filter(|(last, current, next)| {
-                let v1: VectorF32 = last.vec_to(current);
-                let v2: VectorF32 = current.vec_to(next);
+            .filter(|((_, last), (_, current), (_, next))| {
+                let v1: VectorF32 = *last - *current;
+                let v2: VectorF32 = *current - *next;
 
                 let angle_deg: f32 =
                     (v1.dot(&v2).abs() / (v1.abs() * v2.abs())).acos() * 180f32 / PI;
 
                 angle_deg > 30f32
             })
-            .flat_map(|(_, point, _)| point.to_bytes_vec())
-            .collect();
+            .for_each(|(_, (point_nr, _), _)| {
+                let point_idx: usize = point_nr * self.point_step as usize;
+
+                data.extend_from_slice(&self.data[point_idx..point_idx + self.point_step as usize])
+            });
 
         PointCloud2 {
             header: self.header,
             height: 1,
-            width: data.len() as u32 / 16,
-            fields: vec![
-                PointField {
-                    name: "x".to_string(),
-                    offset: 0,
-                    datatype: 7,
-                    count: 1,
-                },
-                PointField {
-                    name: "y".to_string(),
-                    offset: 4,
-                    datatype: 7,
-                    count: 1,
-                },
-                PointField {
-                    name: "z".to_string(),
-                    offset: 8,
-                    datatype: 7,
-                    count: 1,
-                },
-                PointField {
-                    name: "intensity".to_string(),
-                    offset: 12,
-                    datatype: 7,
-                    count: 1,
-                },
-            ],
+            width: data.len() as u32 / self.point_step,
+            fields: self.fields,
             is_bigendian: false,
-            point_step: 16,
-            row_step: 16,
+            point_step: self.point_step,
+            row_step: self.point_step,
             data,
             is_dense: true,
         }
@@ -77,43 +91,11 @@ struct VectorF32 {
     z: f32,
 }
 
-#[derive(Debug, Clone, Copy)]
-struct PointF32 {
-    position: VectorF32,
-    intensity: f32,
-}
-
-impl PointF32 {
-    fn from_bytes(chunk: &[u8]) -> Self {
-        Self {
-            position: VectorF32 {
-                x: LittleEndian::read_f32(&chunk[0..4]),
-                y: LittleEndian::read_f32(&chunk[4..8]),
-                z: LittleEndian::read_f32(&chunk[8..12]),
-            },
-            intensity: LittleEndian::read_f32(&chunk[12..16]),
-        }
-    }
-
-    fn to_bytes_vec(&self) -> Vec<u8> {
-        let mut buf = [0u8; 16];
-        buf[0..4].copy_from_slice(&self.position.x.to_le_bytes());
-        buf[4..8].copy_from_slice(&self.position.y.to_le_bytes());
-        buf[8..12].copy_from_slice(&self.position.z.to_le_bytes());
-        buf[12..16].copy_from_slice(&self.intensity.to_le_bytes());
-        buf.to_vec()
-    }
-
-    fn is_valid(&self) -> bool {
-        self.position.is_valid() && !self.intensity.is_nan()
-    }
-
-    fn vec_to(&self, target: &Self) -> VectorF32 {
-        target.position - self.position
-    }
-}
-
 impl VectorF32 {
+    fn new(x: f32, y: f32, z: f32) -> Self {
+        Self { x, y, z }
+    }
+
     fn is_valid(&self) -> bool {
         !(self.x.is_nan() || self.y.is_nan() || self.z.is_nan())
     }
@@ -171,7 +153,7 @@ fn main() {
                 .for_each(|msg| {
                     let start = Instant::now();
                     let downsampled = msg.downsample();
-                    println!("Duration: {} ms", start.elapsed().as_millis());
+                    println!("Duration: {} us", start.elapsed().as_micros());
                     publisher.publish(&downsampled);
 
                     future::ready(())