diff -ruN '--exclude=.git' squeezelite-1.8.orig/dop.c squeezelite-1.8/dop.c
--- squeezelite-1.8.orig/dop.c	2015-10-07 06:28:19.297316324 +0000
+++ squeezelite-1.8/dop.c	2015-10-16 11:41:41.841468107 +0000
@@ -30,22 +30,22 @@
 #define LOCK_O   mutex_lock(outputbuf->mutex)
 #define UNLOCK_O mutex_unlock(outputbuf->mutex)
 
-// check for 32 dop marker frames to see if this is dop in flac
-// dop is always encoded in 24 bit samples with marker 0x0005xxxx or 0x00FAxxxx
-bool is_flac_dop(u32_t *lptr, u32_t *rptr, frames_t frames) {
+// check for 32 dop marker frames to see if this is a dop stream
+// dop is always encoded in 24 bit samples with markers 0x05 or 0xFA in MSB
+bool is_stream_dop(u8_t *lptr, u8_t *rptr, int step, frames_t frames) {
 	int matched = 0;
 	u32_t next = 0;
 
 	while (frames--) {
-		if (((*lptr & 0x00FF0000) == 0x00050000 && (*rptr & 0x00FF0000) == 0x00050000) ||
-			((*lptr & 0x00FF0000) == 0x00FA0000 && (*rptr & 0x00FF0000) == 0x00FA0000)) {
-			if (*lptr >> 24 == next) {
+		if ((*lptr == 0x05 && *rptr == 0x05) ||
+			(*lptr == 0xFA && *rptr == 0xFA)) {
+			if (*lptr == next) {
 				matched++;
-				next = ( 0x05 + 0xFA ) - next;
 			} else {
-				next = *lptr >> 24;
+				next = *lptr;
 				matched = 1;
 			}
+			next = ( 0x05 + 0xFA ) - next;
 		} else {
 			return false;
 		}
@@ -53,7 +53,7 @@
 			return true;
 		}
 
-		++lptr; ++rptr;
+		lptr+=step; rptr+=step;
 	}
 	return false;
 }
@@ -65,38 +65,22 @@
 	if (!invert) {
 		while (frames--) {
 			u32_t scaled_marker = marker << 24;
-			*ptr = (*ptr & 0x00FFFFFF) | scaled_marker;
+			*ptr = (*ptr & 0x00FFFF00) | scaled_marker;
 			++ptr;
-			*ptr = (*ptr & 0x00FFFFFF) | scaled_marker;
+			*ptr = (*ptr & 0x00FFFF00) | scaled_marker;
 			++ptr;
 			marker = ( 0x05 + 0xFA ) - marker;
 		}
 	} else {
 		while (frames--) {
 			u32_t scaled_marker = marker << 24;
-			*ptr = ((~(*ptr)) & 0x00FFFFFF) | scaled_marker;
+			*ptr = ((~(*ptr)) & 0x00FFFF00) | scaled_marker;
 			++ptr;
-			*ptr = ((~(*ptr)) & 0x00FFFFFF) | scaled_marker;
+			*ptr = ((~(*ptr)) & 0x00FFFF00) | scaled_marker;
 			++ptr;
 			marker = ( 0x05 + 0xFA ) - marker;
 		}
 	}
 }
 
-// fill silence buffer with 10101100 which represents dop silence
-// leave marker zero it will be updated at output, leave lsb zero
-void dop_silence_frames(u32_t *ptr, frames_t frames) {
-	while (frames--) {
-		*ptr++ = 0x00ACAC00;
-		*ptr++ = 0x00ACAC00;
-	}
-}
-
-void dop_init(bool enable, unsigned delay) {
-	LOCK_O;
-	output.has_dop = enable;
-	output.dop_delay = delay;
-	UNLOCK_O;
-}
-
 #endif // DSD
diff -ruN '--exclude=.git' squeezelite-1.8.orig/dsd.c squeezelite-1.8/dsd.c
--- squeezelite-1.8.orig/dsd.c	2015-10-07 06:28:19.297316324 +0000
+++ squeezelite-1.8/dsd.c	2015-10-16 11:41:41.841468107 +0000
@@ -58,11 +58,11 @@
 
 #define BLOCK 4096 // expected size of dsd block
 #define BLOCK_FRAMES BLOCK * BYTES_PER_FRAME
-#define WRAP_BUF_SIZE 16
+#define WRAP_BUF_SIZE 32 // max 4 bytes per frame and 8 channels
 
 typedef enum { UNKNOWN=0, DSF, DSDIFF } dsd_type;
 
-static bool dop = false; // local copy of output.has_dop to avoid holding output lock
+static dsd_format outfmt = PCM; // local copy of output.dsdfmt to avoid holding output lock
 
 struct dsd {
 	dsd_type type;
@@ -212,7 +212,20 @@
 	unsigned bytes = _buf_used(streambuf);
 	unsigned block_left = d->block_size;
 	
-	unsigned bytes_per_frame = dop ? 2 : 1;
+	unsigned bytes_per_frame;
+	switch (outfmt) {
+	case DSD_U32_LE:
+	case DSD_U32_BE:
+		bytes_per_frame = 4;
+		break;
+	case DSD_U16_LE:
+	case DSD_U16_BE:
+	case DOP:
+		bytes_per_frame = 2;
+		break;
+	default:
+		bytes_per_frame = 1;
+	}
 	
 	if (bytes < d->block_size * d->channels) {
 		LOG_INFO("stream too short"); // this can occur when scanning the track
@@ -249,12 +262,14 @@
 
 		frames = min(bytes, d->sample_bytes) / bytes_per_frame;
 		if (frames == 0) {
-			if (dop && d->sample_bytes == 1 && bytes >= 2) {
-				// 1 byte left add a byte of silence and play
-				*(iptrl + 1) = *(iptrr + 1) = 0x69;
+			if (d->sample_bytes && bytes >= (2 * d->sample_bytes)) {
+				// byte(s) left fill frame with silence byte(s) and play
+				int i;
+				for (i = d->sample_bytes; i < bytes_per_frame; i++)
+					*(iptrl + i) = *(iptrr + i) = 0x69;
 				frames = 1;
 			} else {
-				// should not get here due to wrapping m/2 for dop should never result in 0 as header len is always even
+				// should not get here due to wrapping m/2 for dsd should never result in 0 as header len is always even
 				LOG_INFO("frames got to zero");
 				return DECODE_COMPLETE;
 			}
@@ -266,7 +281,123 @@
 		
 		count = frames;
 		
-		if (dop) {
+		switch (outfmt) {
+			
+		case DSD_U32_LE:
+		case DSD_U32_BE:
+			
+			if (d->channels == 1) {
+				if (d->lsb_first) {
+					while (count--) {
+						*(optr++) = dsd2pcm_bitreverse[*(iptrl)] << 24 | dsd2pcm_bitreverse[*(iptrl+1)] << 16
+							| dsd2pcm_bitreverse[*(iptrl+2)] << 8 | dsd2pcm_bitreverse[*(iptrl+3)];
+						*(optr++) = dsd2pcm_bitreverse[*(iptrl)] << 24 | dsd2pcm_bitreverse[*(iptrl+1)] << 16
+							| dsd2pcm_bitreverse[*(iptrl+2)] << 8 | dsd2pcm_bitreverse[*(iptrl+3)];
+						iptrl += 4;
+					}
+				} else {
+					while (count--) {
+						*(optr++) = *(iptrl) << 24 | *(iptrl+1) << 16 | *(iptrl+2) << 8 | *(iptrl+3);
+						*(optr++) = *(iptrl) << 24 | *(iptrl+1) << 16 | *(iptrl+2) << 8 | *(iptrl+3);
+						iptrl += 4;
+					}
+				}
+			} else {
+				if (d->lsb_first) {
+					while (count--) {
+						*(optr++) = dsd2pcm_bitreverse[*(iptrl)] << 24 | dsd2pcm_bitreverse[*(iptrl+1)] << 16
+							| dsd2pcm_bitreverse[*(iptrl+2)] << 8 | dsd2pcm_bitreverse[*(iptrl+3)];
+						*(optr++) = dsd2pcm_bitreverse[*(iptrr)] << 24 | dsd2pcm_bitreverse[*(iptrr+1)] << 16
+							| dsd2pcm_bitreverse[*(iptrr+2)] << 8 | dsd2pcm_bitreverse[*(iptrr+3)];
+						iptrl += 4;
+						iptrr += 4;
+					}
+				} else {
+					while (count--) {
+						*(optr++) = *(iptrl) << 24 | *(iptrl+1) << 16 | *(iptrl+2) << 8 | *(iptrl+3);
+						*(optr++) = *(iptrr) << 24 | *(iptrr+1) << 16 | *(iptrr+2) << 8 | *(iptrr+3);
+						iptrl += 4;
+						iptrr += 4;
+					}
+				}
+			}
+			
+			break;
+
+		case DSD_U16_LE:
+		case DSD_U16_BE:
+			
+			if (d->channels == 1) {
+				if (d->lsb_first) {
+					while (count--) {
+						*(optr++) = dsd2pcm_bitreverse[*(iptrl)] << 24 | dsd2pcm_bitreverse[*(iptrl+1)] << 16;
+						*(optr++) = dsd2pcm_bitreverse[*(iptrl)] << 24 | dsd2pcm_bitreverse[*(iptrl+1)] << 16;
+						iptrl += 2;
+					}
+				} else {
+					while (count--) {
+						*(optr++) = *(iptrl) << 24 | *(iptrl+1) << 16;
+						*(optr++) = *(iptrl) << 24 | *(iptrl+1) << 16;
+						iptrl += 2;
+					}
+				}
+			} else {
+				if (d->lsb_first) {
+					while (count--) {
+						*(optr++) = dsd2pcm_bitreverse[*(iptrl)] << 24 | dsd2pcm_bitreverse[*(iptrl+1)] << 16;
+						*(optr++) = dsd2pcm_bitreverse[*(iptrr)] << 24 | dsd2pcm_bitreverse[*(iptrr+1)] << 16;
+						iptrl += 2;
+						iptrr += 2;
+					}
+				} else {
+					while (count--) {
+						*(optr++) = *(iptrl) << 24 | *(iptrl+1) << 16;
+						*(optr++) = *(iptrr) << 24 | *(iptrr+1) << 16;
+						iptrl += 2;
+						iptrr += 2;
+					}
+				}
+			}
+			
+			break;
+
+		case DSD_U8:
+			
+			if (d->channels == 1) {
+				if (d->lsb_first) {
+					while (count--) {
+						*(optr++) = dsd2pcm_bitreverse[*(iptrl)] << 24;
+						*(optr++) = dsd2pcm_bitreverse[*(iptrl)] << 24;
+						iptrl += 1;
+					}
+				} else {
+					while (count--) {
+						*(optr++) = *(iptrl) << 24;
+						*(optr++) = *(iptrl) << 24;
+						iptrl += 1;
+					}
+				}
+			} else {
+				if (d->lsb_first) {
+					while (count--) {
+						*(optr++) = dsd2pcm_bitreverse[*(iptrl)] << 24;
+						*(optr++) = dsd2pcm_bitreverse[*(iptrr)] << 24;
+						iptrl += 1;
+						iptrr += 1;
+					}
+				} else {
+					while (count--) {
+						*(optr++) = *(iptrl) << 24;
+						*(optr++) = *(iptrr) << 24;
+						iptrl += 1;
+						iptrr += 1;
+					}
+				}
+			}
+			
+			break;
+
+		case DOP:
 			
 			if (d->channels == 1) {
 				if (d->lsb_first) {
@@ -300,7 +431,9 @@
 				}
 			}
 			
-		} else {
+			break;
+
+		case PCM:
 			
 			if (d->channels == 1) {
 				float *iptrf = d->transfer[0];
@@ -329,6 +462,8 @@
 				}
 			}
 			
+			break;
+			
 		}
 		
 		_buf_inc_readp(streambuf, bytes_read);
@@ -385,9 +520,17 @@
 		out = process.max_in_frames;
 	);
 	
-	if (dop) {
+	switch (outfmt) {
+	case DSD_U32_LE:
+	case DSD_U32_BE:
+		bytes_per_frame = d->channels * 4;
+		break;
+	case DSD_U16_LE:
+	case DSD_U16_BE:
+	case DOP:
 		bytes_per_frame = d->channels * 2;
-	} else {
+		break;
+	default:
 		bytes_per_frame = d->channels;
 		out = min(out, BLOCK);
 	}
@@ -404,7 +547,7 @@
 		optr = (u32_t *)process.inbuf;
 	);
 	
-	// handle wrap around end of streambuf and partial dop frame at end of stream
+	// handle wrap around end of streambuf and partial dsd frame at end of stream
 	if (!frames && bytes < bytes_per_frame) {
 		memset(tmp, 0x69, WRAP_BUF_SIZE); // 0x69 = dsd silence
 		memcpy(tmp, streambuf->readp, bytes);
@@ -420,7 +563,67 @@
 	
 	count = frames;
 	
-	if (dop) {
+	switch (outfmt) {
+
+	case DSD_U32_LE:
+	case DSD_U32_BE:
+		
+		if (d->channels == 1) {
+			while (count--) {
+				*(optr++) = *(iptr) << 24 | *(iptr+1) << 16 | *(iptr+2) << 8 | *(iptr+3);
+				*(optr++) = *(iptr) << 24 | *(iptr+1) << 16 | *(iptr+2) << 8 | *(iptr+3);
+				iptr += bytes_per_frame;
+			}
+		} else {
+			while (count--) {
+				*(optr++) = *(iptr  ) << 24 | *(iptr + d->channels)     << 16
+					| *(iptr + 2 * d->channels)     << 8 | *(iptr + 3 * d->channels);
+				*(optr++) = *(iptr+1) << 24 | *(iptr + d->channels + 1) << 16
+					| *(iptr + 2 * d->channels + 1) << 8 | *(iptr + 3 * d->channels + 1);
+				iptr += bytes_per_frame;
+			}
+		}
+		
+		break;
+		
+	case DSD_U16_LE:
+	case DSD_U16_BE:
+		
+		if (d->channels == 1) {
+			while (count--) {
+				*(optr++) = *(iptr) << 24 | *(iptr+1) << 16;
+				*(optr++) = *(iptr) << 24 | *(iptr+1) << 16;
+				iptr += bytes_per_frame;
+			}
+		} else {
+			while (count--) {
+				*(optr++) = *(iptr  ) << 24 | *(iptr + d->channels)     << 16;
+				*(optr++) = *(iptr+1) << 24 | *(iptr + d->channels + 1) << 16;
+				iptr += bytes_per_frame;
+			}
+		}
+		
+		break;
+		
+	case DSD_U8:
+		
+		if (d->channels == 1) {
+			while (count--) {
+				*(optr++) = *(iptr) << 24;
+				*(optr++) = *(iptr) << 24;
+				iptr += bytes_per_frame;
+			}
+		} else {
+			while (count--) {
+				*(optr++) = *(iptr  ) << 24;
+				*(optr++) = *(iptr+1) << 24;
+				iptr += bytes_per_frame;
+			}
+		}
+		
+		break;
+		
+	case DOP:
 		
 		if (d->channels == 1) {
 			while (count--) {
@@ -436,7 +639,9 @@
 			}
 		}
 		
-	} else {
+		break;
+		
+	case PCM:
 		
 		if (d->channels == 1) {
 			float *iptrf = d->transfer[0];
@@ -465,6 +670,8 @@
 			}
 		}
 
+		break;
+		
 	}
 	
 	_buf_inc_readp(streambuf, bytes_read);
@@ -491,6 +698,7 @@
 
 static decode_state dsd_decode(void) {
 	decode_state ret;
+	char *fmtstr;
 
 	LOCK_S;
 
@@ -527,25 +735,53 @@
 		LOG_INFO("setting track_start");
 		output.track_start = outputbuf->writep;
 
-		dop = output.has_dop;
+		outfmt = output.dsdfmt;
 
-		if (dop && d->sample_rate / 16 > output.supported_rates[0]) {
-			LOG_INFO("DOP sample rate too high for device - converting to PCM");
-			dop = false;
+		switch (outfmt) {
+		case DSD_U32_LE:
+			fmtstr = "DSD_U32_LE";
+			output.next_sample_rate = d->sample_rate / 32;
+			break;
+		case DSD_U32_BE:
+			fmtstr = "DSD_U32_BE";
+			output.next_sample_rate = d->sample_rate / 32;
+			break;
+		case DSD_U16_LE:
+			fmtstr = "DSD_U16_LE";
+			output.next_sample_rate = d->sample_rate / 16;
+			break;
+		case DSD_U16_BE:
+			fmtstr = "DSD_U16_BE";
+			output.next_sample_rate = d->sample_rate / 16;
+			break;
+		case DSD_U8:
+			fmtstr = "DSD_U8";
+			output.next_sample_rate = d->sample_rate / 8;
+			break;
+		case DOP:
+			fmtstr = "DOP";
+			output.next_sample_rate = d->sample_rate / 16;
+			break;
+		case PCM:
+			// PCM case after DSD rate check and possible fallback to PCM conversion 
+			break;
 		}
 
-		if (dop) {
-			LOG_INFO("DOP output");
-			output.next_dop = true;
-			output.next_sample_rate = d->sample_rate / 16;
-			output.fade = FADE_INACTIVE;
-		} else {
+		if (outfmt != PCM && output.next_sample_rate > output.supported_rates[0]) {
+			LOG_INFO("DSD sample rate too high for device - converting to PCM");
+			outfmt = PCM;
+		}
+		
+		if (outfmt == PCM) {
 			LOG_INFO("DSD to PCM output");
-			output.next_dop = false;
 			output.next_sample_rate = decode_newstream(d->sample_rate / 8, output.supported_rates);
 			if (output.fade_mode) _checkfade(true);
+		} else {
+			LOG_INFO("DSD%u stream, format: %s, rate: %uHz\n", d->sample_rate / 44100, fmtstr, output.next_sample_rate);
+			output.fade = FADE_INACTIVE;
 		}
-	
+
+		output.next_fmt = outfmt;
 		decode.new_stream = false;
 
 		UNLOCK_O;
@@ -570,6 +806,13 @@
 	return ret;
 }
 
+void dsd_init(dsd_format format, unsigned delay) {
+	LOCK_O;
+	output.dsdfmt = format;
+	output.dsd_delay = delay;
+	UNLOCK_O;
+}
+
 static void dsd_open(u8_t size, u8_t rate, u8_t chan, u8_t endianness) {
 	d->type = UNKNOWN;
 
@@ -625,4 +868,22 @@
 	return &ret;
 }
 
+// invert polarity for frames in the output buffer
+void dsd_invert(u32_t *ptr, frames_t frames) {
+	while (frames--) {
+		*ptr = ~(*ptr);
+		++ptr;
+		*ptr = ~(*ptr);
+		++ptr;
+	}
+}
+
+// fill silence buffer with 10101100 which represents dsd silence
+void dsd_silence_frames(u32_t *ptr, frames_t frames) {
+	while (frames--) {
+		*ptr++ = 0x69696969;
+		*ptr++ = 0x69696969;
+	}
+}
+
 #endif // DSD
diff -ruN '--exclude=.git' squeezelite-1.8.orig/faad.c squeezelite-1.8/faad.c
--- squeezelite-1.8.orig/faad.c	2015-10-07 06:28:19.300649541 +0000
+++ squeezelite-1.8/faad.c	2015-10-16 11:41:41.841468107 +0000
@@ -376,7 +376,7 @@
 			LOCK_O;
 			LOG_INFO("setting track_start");
 			output.next_sample_rate = decode_newstream(samplerate, output.supported_rates);
-			IF_DSD( output.next_dop = false; )
+			IF_DSD( output.next_fmt = PCM; )
 			output.track_start = outputbuf->writep;
 			if (output.fade_mode) _checkfade(true);
 			decode.new_stream = false;
diff -ruN '--exclude=.git' squeezelite-1.8.orig/ffmpeg.c squeezelite-1.8/ffmpeg.c
--- squeezelite-1.8.orig/ffmpeg.c	2015-10-07 06:28:19.300649541 +0000
+++ squeezelite-1.8/ffmpeg.c	2015-10-16 11:41:41.841468107 +0000
@@ -339,7 +339,7 @@
 		LOCK_O;
 		LOG_INFO("setting track_start");
 		output.next_sample_rate = decode_newstream(ff->codecC->sample_rate, output.supported_rates);
-		IF_DSD(	output.next_dop = false; )
+		IF_DSD(	output.next_fmt = PCM; )
 		output.track_start = outputbuf->writep;
 		if (output.fade_mode) _checkfade(true);
 		decode.new_stream = false;
diff -ruN '--exclude=.git' squeezelite-1.8.orig/flac.c squeezelite-1.8/flac.c
--- squeezelite-1.8.orig/flac.c	2015-10-07 06:28:19.300649541 +0000
+++ squeezelite-1.8/flac.c	2015-10-16 11:41:41.841468107 +0000
@@ -118,14 +118,19 @@
 		decode.new_stream = false;
 
 #if DSD
-		if (output.has_dop && bits_per_sample == 24 && is_flac_dop((u32_t *)lptr, (u32_t *)rptr, frames)) {
+#if SL_LITTLE_ENDIAN
+#define MARKER_OFFSET 2
+#else
+#define MARKER_OFFSET 1
+#endif		
+		if (bits_per_sample == 24 && is_stream_dop(((u8_t *)lptr) + MARKER_OFFSET, ((u8_t *)rptr) + MARKER_OFFSET, 4, frames)) {
 			LOG_INFO("file contains DOP");
-			output.next_dop = true;
+			output.next_fmt = DOP;
 			output.next_sample_rate = frame->header.sample_rate;
 			output.fade = FADE_INACTIVE;
 		} else {
 			output.next_sample_rate = decode_newstream(frame->header.sample_rate, output.supported_rates);
-			output.next_dop = false;
+			output.next_fmt = PCM;
 			if (output.fade_mode) _checkfade(true);
 		}
 #else
diff -ruN '--exclude=.git' squeezelite-1.8.orig/mad.c squeezelite-1.8/mad.c
--- squeezelite-1.8.orig/mad.c	2015-10-07 06:28:19.300649541 +0000
+++ squeezelite-1.8/mad.c	2015-10-16 11:41:41.841468107 +0000
@@ -241,7 +241,7 @@
 			LOCK_O;
 			LOG_INFO("setting track_start");
 			output.next_sample_rate = decode_newstream(m->synth.pcm.samplerate, output.supported_rates);
-			IF_DSD(	output.next_dop = false; )
+			IF_DSD(	output.next_fmt = PCM; )
 			output.track_start = outputbuf->writep;
 			if (output.fade_mode) _checkfade(true);
 			decode.new_stream = false;
diff -ruN '--exclude=.git' squeezelite-1.8.orig/main.c squeezelite-1.8/main.c
--- squeezelite-1.8.orig/main.c	2015-10-07 06:28:19.300649541 +0000
+++ squeezelite-1.8/main.c	2015-10-16 11:50:52.822122410 +0000
@@ -91,7 +91,12 @@
 		   "  \t\t\t phase_response = 0-100 (0 = minimum / 50 = linear / 100 = maximum)\n"
 #endif
 #if DSD
-		   "  -D [delay]\t\tOutput device supports DSD over PCM (DoP), delay = optional delay switching between PCM and DoP in ms\n" 
+#if ALSA
+		   "  -D [delay][:format]\tOutput device supports DSD, delay = optional delay switching between PCM and DSD in ms\n"
+		   "  \t\t\t format = dop (default if not specified), u8, u16le, u16be, u32le or u32be.\n"
+#else
+		   "  -D [delay]\t\tOutput device supports DSD over PCM (DoP), delay = optional delay switching between PCM and DoP in ms\n"
+#endif
 #endif
 #if VISEXPORT
 		   "  -v \t\t\tVisualiser support\n"
@@ -215,8 +220,8 @@
 	bool output_mixer_unmute = false;
 #endif
 #if DSD
-	bool dop = false;
-	unsigned dop_delay = 0;
+	unsigned dsd_delay = 0;
+	dsd_format dsd_outfmt = PCM;
 #endif
 #if VISEXPORT
 	bool visexport = false;
@@ -443,9 +448,19 @@
 #endif
 #if DSD
 		case 'D':
-			dop = true;
+			dsd_outfmt = DOP;
 			if (optind < argc && argv[optind] && argv[optind][0] != '-') {
-				dop_delay = atoi(argv[optind++]);
+				char *dstr = next_param(argv[optind++], ':');
+				char *fstr = next_param(NULL, ':');
+				dsd_delay = dstr ? atoi(dstr) : 0;
+				if (fstr) {
+					if (!strcmp(fstr, "dop")) dsd_outfmt = DOP; 
+					if (!strcmp(fstr, "u8")) dsd_outfmt = DSD_U8; 
+					if (!strcmp(fstr, "u16le")) dsd_outfmt = DSD_U16_LE; 
+					if (!strcmp(fstr, "u32le")) dsd_outfmt = DSD_U32_LE; 
+					if (!strcmp(fstr, "u16be")) dsd_outfmt = DSD_U16_BE; 
+					if (!strcmp(fstr, "u32be")) dsd_outfmt = DSD_U32_BE;
+				}
 			}
 			break;
 #endif
@@ -571,7 +586,7 @@
 	}
 
 #if DSD
-	dop_init(dop, dop_delay);
+	dsd_init(dsd_outfmt, dsd_delay);
 #endif
 
 #if VISEXPORT
diff -ruN '--exclude=.git' squeezelite-1.8.orig/mpg.c squeezelite-1.8/mpg.c
--- squeezelite-1.8.orig/mpg.c	2015-10-07 06:28:19.300649541 +0000
+++ squeezelite-1.8/mpg.c	2015-10-16 11:41:41.844801323 +0000
@@ -124,7 +124,7 @@
 			LOG_INFO("setting track_start");
 			LOCK_O_not_direct;
 			output.next_sample_rate = decode_newstream(rate, output.supported_rates);
-			IF_DSD( output.next_dop = false; )
+			IF_DSD( output.next_fmt = PCM; )
 			output.track_start = outputbuf->writep;
 			if (output.fade_mode) _checkfade(true);
 			decode.new_stream = false;
diff -ruN '--exclude=.git' squeezelite-1.8.orig/output_alsa.c squeezelite-1.8/output_alsa.c
--- squeezelite-1.8.orig/output_alsa.c	2015-10-07 06:28:19.300649541 +0000
+++ squeezelite-1.8/output_alsa.c	2015-10-16 11:41:41.844801323 +0000
@@ -44,6 +44,10 @@
 static struct {
 	char device[MAX_DEVICE_LEN + 1];
 	snd_pcm_format_t format;
+#if DSD
+	dsd_format outfmt;
+	snd_pcm_format_t pcmfmt;
+#endif
 	snd_pcm_uframes_t buffer_size;
 	snd_pcm_uframes_t period_size;
 	unsigned rate;
@@ -58,7 +62,7 @@
 
 extern u8_t *silencebuf;
 #if DSD
-extern u8_t *silencebuf_dop;
+extern u8_t *silencebuf_dsd;
 #endif
 
 static log_level loglevel;
@@ -334,7 +338,11 @@
 	return true;
 }
 
+#if DSD
+static int alsa_open(const char *device, unsigned sample_rate, unsigned alsa_buffer, unsigned alsa_period, dsd_format outfmt) {
+#else
 static int alsa_open(const char *device, unsigned sample_rate, unsigned alsa_buffer, unsigned alsa_period) {
+#endif
 	int err;
 	snd_pcm_hw_params_t *hw_params;
 	snd_pcm_hw_params_alloca(&hw_params);
@@ -344,6 +352,9 @@
 
 	// reset params
 	alsa.rate = 0;
+#if DSD
+	alsa.outfmt = PCM;
+#endif
 	alsa.period_size = 0;
 	strcpy(alsa.device, device);
 
@@ -400,6 +411,22 @@
 	}
 
 	// set the sample format
+#if DSD
+	switch (outfmt) {
+	case DSD_U8:
+		alsa.format = SND_PCM_FORMAT_DSD_U8; break;
+	case DSD_U16_LE:
+		alsa.format = SND_PCM_FORMAT_DSD_U16_LE; break;
+	case DSD_U16_BE:
+		alsa.format = SND_PCM_FORMAT_DSD_U16_BE; break;
+	case DSD_U32_LE:
+		alsa.format = SND_PCM_FORMAT_DSD_U32_LE; break;
+	case DSD_U32_BE:
+		alsa.format = SND_PCM_FORMAT_DSD_U32_BE; break;
+	default:
+		alsa.format = alsa.pcmfmt;
+	}
+#endif
 	snd_pcm_format_t *fmt = alsa.format ? &alsa.format : (snd_pcm_format_t *)fmts;
 	do {
 		if (snd_pcm_hw_params_set_format(pcmp, hw_params, *fmt) >= 0) {
@@ -428,6 +455,18 @@
 		output.format = S24_3LE; break;
 	case SND_PCM_FORMAT_S16_LE: 
 		output.format = S16_LE; break;
+#if DSD
+	case SND_PCM_FORMAT_DSD_U32_LE:
+		output.format = U32_LE; break;
+	case SND_PCM_FORMAT_DSD_U32_BE:
+		output.format = U32_BE; break;
+	case SND_PCM_FORMAT_DSD_U16_LE:
+		output.format = U16_LE; break;
+	case SND_PCM_FORMAT_DSD_U16_BE:
+		output.format = U16_BE; break;
+	case SND_PCM_FORMAT_DSD_U8:
+		output.format = U8; break;
+#endif
 	default: 
 		break;
 	}
@@ -512,7 +551,10 @@
 
 	// this indicates we have opened the device ok
 	alsa.rate = sample_rate;
-
+#if DSD
+	alsa.outfmt = outfmt;
+#endif
+	
 	return 0;
 }
 
@@ -548,11 +590,14 @@
 	inputptr = (s32_t *) (silence ? silencebuf : outputbuf->readp);
 
 	IF_DSD(
-		if (output.dop) {
+		if (output.outfmt != PCM) {
 			if (silence) {
-				inputptr = (s32_t *) silencebuf_dop;
+				inputptr = (s32_t *) silencebuf_dsd;
 			}
-			update_dop((u32_t *) inputptr, out_frames, output.invert && !silence);
+			if (output.outfmt == DOP)
+				update_dop((u32_t *) inputptr, out_frames, output.invert && !silence);
+			else if (output.invert && !silence)
+				dsd_invert((u32_t *) inputptr, out_frames);
 		}
 	)
 
@@ -634,17 +679,28 @@
 			probe_device = false;
 		}
 
+#if DSD
+		if (!pcmp || alsa.rate != output.current_sample_rate || alsa.outfmt != output.outfmt ) {
+#else
 		if (!pcmp || alsa.rate != output.current_sample_rate) {
+#endif
 			LOG_INFO("open output device: %s", output.device);
 			LOCK;
 
 			// FIXME - some alsa hardware requires opening twice for a new sample rate to work
 			// this is a workaround which should be removed
 			if (alsa.reopen) {
+#if DSD
+				alsa_open(output.device, output.current_sample_rate, output.buffer, output.period, output.outfmt);
+#else
 				alsa_open(output.device, output.current_sample_rate, output.buffer, output.period);
+#endif
 			}
-
+#if DSD
+			if (!!alsa_open(output.device, output.current_sample_rate, output.buffer, output.period, output.outfmt)) {
+#else
 			if (!!alsa_open(output.device, output.current_sample_rate, output.buffer, output.period)) {
+#endif
 				output.error_opening = true;
 				UNLOCK;
 				sleep(5);
@@ -820,7 +876,11 @@
 
 	alsa.mmap = alsa_mmap;
 	alsa.write_buf = NULL;
+#if DSD
+	alsa.pcmfmt = 0;
+#else
 	alsa.format = 0;
+#endif
 	alsa.reopen = alsa_reopen;
 
 	if (!mixer_unmute) {
@@ -836,10 +896,17 @@
 	output.rate_delay = rate_delay;
 
 	if (alsa_sample_fmt) {
+#if DSD
+		if (!strcmp(alsa_sample_fmt, "32"))	alsa.pcmfmt = SND_PCM_FORMAT_S32_LE;
+		if (!strcmp(alsa_sample_fmt, "24")) alsa.pcmfmt = SND_PCM_FORMAT_S24_LE;
+		if (!strcmp(alsa_sample_fmt, "24_3")) alsa.pcmfmt = SND_PCM_FORMAT_S24_3LE;
+		if (!strcmp(alsa_sample_fmt, "16")) alsa.pcmfmt = SND_PCM_FORMAT_S16_LE;
+#else
 		if (!strcmp(alsa_sample_fmt, "32"))	alsa.format = SND_PCM_FORMAT_S32_LE;
 		if (!strcmp(alsa_sample_fmt, "24")) alsa.format = SND_PCM_FORMAT_S24_LE;
 		if (!strcmp(alsa_sample_fmt, "24_3")) alsa.format = SND_PCM_FORMAT_S24_3LE;
 		if (!strcmp(alsa_sample_fmt, "16")) alsa.format = SND_PCM_FORMAT_S16_LE;
+#endif
 	}
 
 	LOG_INFO("requested alsa_buffer: %u alsa_period: %u format: %s mmap: %u", output.buffer, output.period, 
diff -ruN '--exclude=.git' squeezelite-1.8.orig/output.c squeezelite-1.8/output.c
--- squeezelite-1.8.orig/output.c	2015-10-07 06:28:19.300649541 +0000
+++ squeezelite-1.8/output.c	2015-10-16 11:41:41.844801323 +0000
@@ -32,7 +32,7 @@
 
 u8_t *silencebuf;
 #if DSD
-u8_t *silencebuf_dop;
+u8_t *silencebuf_dsd;
 #endif
 
 #define LOCK   mutex_lock(outputbuf->mutex)
@@ -126,8 +126,8 @@
 					delay = output.rate_delay;
 				}
 				IF_DSD(
-				   if (output.dop != output.next_dop) {
-					   delay = output.dop_delay;
+				   if (output.outfmt != output.next_fmt) {
+					   delay = output.dsd_delay;
 				   }
 				)
 				frames -= size;
@@ -149,7 +149,7 @@
 				output.track_start_time = gettime_ms();
 				output.current_sample_rate = output.next_sample_rate;
 				IF_DSD(
-				   output.dop = output.next_dop;
+				   output.outfmt = output.next_fmt;
 				)
 				if (!output.fade == FADE_ACTIVE || !output.fade_mode == FADE_CROSSFADE) {
 					output.current_replay_gain = output.next_replay_gain;
@@ -163,7 +163,7 @@
 		}
 
 		IF_DSD(
-			if (output.dop) {
+			if (output.outfmt != PCM) {
 				gainL = gainR = FIXED_ONE;
 			}
 		)
@@ -359,12 +359,12 @@
 	memset(silencebuf, 0, MAX_SILENCE_FRAMES * BYTES_PER_FRAME);
 
 	IF_DSD(
-		silencebuf_dop = malloc(MAX_SILENCE_FRAMES * BYTES_PER_FRAME);
-		if (!silencebuf_dop) {
-			LOG_ERROR("unable to malloc silence dop buffer");
+		silencebuf_dsd = malloc(MAX_SILENCE_FRAMES * BYTES_PER_FRAME);
+		if (!silencebuf_dsd) {
+			LOG_ERROR("unable to malloc silence dsd buffer");
 			exit(0);
 		}
-		dop_silence_frames((u32_t *)silencebuf_dop, MAX_SILENCE_FRAMES);
+		dsd_silence_frames((u32_t *)silencebuf_dsd, MAX_SILENCE_FRAMES);
 	)
 
 	LOG_DEBUG("idle timeout: %u", idle);
@@ -415,7 +415,7 @@
 	buf_destroy(outputbuf);
 	free(silencebuf);
 	IF_DSD(
-		free(silencebuf_dop);
+		free(silencebuf_dsd);
 	)
 }
 
diff -ruN '--exclude=.git' squeezelite-1.8.orig/output_pa.c squeezelite-1.8/output_pa.c
--- squeezelite-1.8.orig/output_pa.c	2015-10-07 06:28:19.300649541 +0000
+++ squeezelite-1.8/output_pa.c	2015-10-16 11:41:41.844801323 +0000
@@ -47,7 +47,7 @@
 
 extern u8_t *silencebuf;
 #if DSD
-extern u8_t *silencebuf_dop;
+extern u8_t *silencebuf_dsd;
 #endif
 
 void list_devices(void) {
@@ -313,9 +313,10 @@
 		}
 
 		IF_DSD(
-			if (output.dop) {
+			if (output.outfmt == DOP) {
 				update_dop((u32_t *) outputbuf->readp, out_frames, output.invert);
-			}
+			} else if (output.outfmt != PCM && output.invert)
+				dsd_invert((u32_t *) outputbuf->readp, out_frames);
 		)
 
 		memcpy(optr, outputbuf->readp, out_frames * BYTES_PER_FRAME);
@@ -325,7 +326,7 @@
 		u8_t *buf = silencebuf;
 
 		IF_DSD(
-			if (output.dop) {
+			if (output.outfmt != DOP) {
 				buf = silencebuf_dop;
 				update_dop((u32_t *) buf, out_frames, false); // don't invert silence
 			}
diff -ruN '--exclude=.git' squeezelite-1.8.orig/output_pack.c squeezelite-1.8/output_pack.c
--- squeezelite-1.8.orig/output_pack.c	2015-10-07 06:28:19.300649541 +0000
+++ squeezelite-1.8/output_pack.c	2015-10-16 11:41:41.844801323 +0000
@@ -44,6 +44,100 @@
 
 void _scale_and_pack_frames(void *outputptr, s32_t *inputptr, frames_t cnt, s32_t gainL, s32_t gainR, output_format format) {
 	switch(format) {
+#if DSD
+	case U32_LE:
+		{
+#if SL_LITTLE_ENDIAN
+			memcpy(outputptr, inputptr, cnt * BYTES_PER_FRAME);
+#else
+			u32_t *optr = (u32_t *)(void *)outputptr;
+			while (cnt--) {
+				s32_t lsample = *(inputptr++);
+				s32_t rsample = *(inputptr++);
+				*(optr++) = 
+					(lsample & 0xff000000) >> 24 | (lsample & 0x00ff0000) >> 8 |
+					(lsample & 0x0000ff00) << 8  | (lsample & 0x000000ff) << 24;
+				*(optr++) = 
+					(rsample & 0xff000000) >> 24 | (rsample & 0x00ff0000) >> 8 |
+					(rsample & 0x0000ff00) << 8  | (rsample & 0x000000ff) << 24;
+			}
+#endif
+		}
+		break;
+	case U32_BE:
+		{
+#if SL_LITTLE_ENDIAN
+			u32_t *optr = (u32_t *)(void *)outputptr;
+			while (cnt--) {
+				s32_t lsample = *(inputptr++);
+				s32_t rsample = *(inputptr++);
+				*(optr++) = 
+					(lsample & 0xff000000) >> 24 | (lsample & 0x00ff0000) >> 8 |
+					(lsample & 0x0000ff00) << 8  | (lsample & 0x000000ff) << 24;
+				*(optr++) = 
+					(rsample & 0xff000000) >> 24 | (rsample & 0x00ff0000) >> 8 |
+					(rsample & 0x0000ff00) << 8  | (rsample & 0x000000ff) << 24;
+			}
+#else
+			memcpy(outputptr, inputptr, cnt * BYTES_PER_FRAME);
+#endif
+		}
+		break;
+	case U16_LE:
+		{
+			u32_t *optr = (u32_t *)(void *)outputptr;
+#if SL_LITTLE_ENDIAN
+			while (cnt--) {
+				*(optr++) = (*(inputptr) >> 16 & 0x0000ffff) | (*(inputptr + 1) & 0xffff0000);
+				inputptr += 2;
+			}
+#else
+			while (cnt--) {
+				s32_t lsample = *(inputptr++);
+				s32_t rsample = *(inputptr++);
+				*(optr++) = 
+					(lsample & 0x00ff0000) << 8 | (lsample & 0xff000000) >> 8 |
+					(rsample & 0x00ff0000) >> 8 | (rsample & 0xff000000) >> 24;
+			}
+#endif
+		}
+		break;
+	case U16_BE:
+		{
+			u32_t *optr = (u32_t *)(void *)outputptr;
+#if SL_LITTLE_ENDIAN
+			while (cnt--) {
+				s32_t lsample = *(inputptr++);
+				s32_t rsample = *(inputptr++);
+				*(optr++) = 
+					(lsample & 0xff000000) >> 24 | (lsample & 0x00ff0000) >> 8 |
+					(rsample & 0xff000000) >> 8 | (rsample & 0x00ff0000) << 8;
+			}
+#else
+			while (cnt--) {
+				*(optr++) = (*(inputptr) & 0xffff0000) | (*(inputptr + 1) >> 16 & 0x0000ffff);
+				inputptr += 2;
+			}
+#endif
+		}
+		break;
+	case U8:
+		{
+			u16_t *optr = (u16_t *)(void *)outputptr;
+#if SL_LITTLE_ENDIAN
+			while (cnt--) {
+				*(optr++) = (u16_t)((*(inputptr) >> 24 & 0x000000ff) | (*(inputptr + 1) >> 16 & 0x0000ff00));
+				inputptr += 2;
+			}
+#else
+			while (cnt--) {
+				*(optr++) = (u16_t)((*(inputptr) >> 16 & 0x0000ff00) | (*(inputptr + 1) >> 24 & 0x000000ff));
+				inputptr += 2;
+			}
+#endif
+		}
+		break;
+#endif
 	case S16_LE:
 		{
 			u32_t *optr = (u32_t *)(void *)outputptr;
diff -ruN '--exclude=.git' squeezelite-1.8.orig/output_stdout.c squeezelite-1.8/output_stdout.c
--- squeezelite-1.8.orig/output_stdout.c	2015-10-07 06:28:19.300649541 +0000
+++ squeezelite-1.8/output_stdout.c	2015-10-16 11:41:41.844801323 +0000
@@ -36,7 +36,7 @@
 
 extern u8_t *silencebuf;
 #if DSD
-extern u8_t *silencebuf_dop;
+extern u8_t *silencebuf_dsd;
 #endif
 
 // buffer to hold output data so we can block on writing outside of output lock, allocated on init
@@ -63,11 +63,14 @@
 	}
 
 	IF_DSD(
-		   if (output.dop) {
+		   if (output.outfmt != PCM) {
 			   if (silence) {
-				   obuf = silencebuf_dop;
+				   obuf = silencebuf_dsd;
 			   }
-			   update_dop((u32_t *)obuf, out_frames, output.invert && !silence);
+			   if (output.outfmt == DOP)
+				   update_dop((u32_t *)obuf, out_frames, output.invert && !silence);
+			   else if (output.invert && !silence)
+				   dsd_invert((u32_t *)obuf, out_frames);
 		   }
 	)
 
diff -ruN '--exclude=.git' squeezelite-1.8.orig/pcm.c squeezelite-1.8/pcm.c
--- squeezelite-1.8.orig/pcm.c	2015-10-07 06:28:19.300649541 +0000
+++ squeezelite-1.8/pcm.c	2015-10-16 11:41:41.844801323 +0000
@@ -191,11 +191,24 @@
 	if (decode.new_stream) {
 		LOG_INFO("setting track_start");
 		LOCK_O_not_direct;
-		output.next_sample_rate = decode_newstream(sample_rate, output.supported_rates);
 		output.track_start = outputbuf->writep;
-		IF_DSD( output.next_dop = false; )
-		if (output.fade_mode) _checkfade(true);
 		decode.new_stream = false;
+		output.next_sample_rate = decode_newstream(sample_rate, output.supported_rates);
+#if DSD
+		if (sample_size == 3 &&
+			is_stream_dop(((u8_t *)streambuf->readp) + (bigendian?0:2),
+						  ((u8_t *)streambuf->readp) + (bigendian?0:2) + sample_size,
+						  sample_size * channels, bytes / (sample_size * channels))) {
+			LOG_INFO("file contains DOP");
+			output.next_fmt = DOP;
+			output.fade = FADE_INACTIVE;
+		} else {
+			output.next_fmt = PCM;
+			if (output.fade_mode) _checkfade(true);
+		}
+#else
+		if (output.fade_mode) _checkfade(true);
+#endif
 		UNLOCK_O_not_direct;
 		IF_PROCESS(
 			out = process.max_in_frames;
diff -ruN '--exclude=.git' squeezelite-1.8.orig/squeezelite.h squeezelite-1.8/squeezelite.h
--- squeezelite-1.8.orig/squeezelite.h	2015-10-07 06:28:19.303982757 +0000
+++ squeezelite-1.8/squeezelite.h	2015-10-16 11:41:41.844801323 +0000
@@ -517,7 +517,12 @@
 typedef enum { OUTPUT_OFF = -1, OUTPUT_STOPPED = 0, OUTPUT_BUFFER, OUTPUT_RUNNING, 
 			   OUTPUT_PAUSE_FRAMES, OUTPUT_SKIP_FRAMES, OUTPUT_START_AT } output_state;
 
+#if DSD
+typedef enum { PCM, DOP, DSD_U8, DSD_U16_LE, DSD_U32_LE, DSD_U16_BE, DSD_U32_BE } dsd_format;
+typedef enum { S32_LE, S24_LE, S24_3LE, S16_LE, U8, U16_LE, U16_BE, U32_LE, U32_BE } output_format;
+#else
 typedef enum { S32_LE, S24_LE, S24_3LE, S16_LE } output_format;
+#endif
 
 typedef enum { FADE_INACTIVE = 0, FADE_DUE, FADE_ACTIVE } fade_state;
 typedef enum { FADE_UP = 1, FADE_DOWN, FADE_CROSS } fade_dir;
@@ -575,10 +580,10 @@
 	u32_t stop_time;
 	u32_t idle_to;
 #if DSD
-	bool next_dop;             // set in decode thread
-	bool dop;
-	bool has_dop;              // set in dop_init - output device supports dop
-	unsigned dop_delay;        // set in dop_init - delay in ms switching to/from dop
+	dsd_format next_fmt;       // set in decode thread
+	dsd_format outfmt;
+	dsd_format dsdfmt;	       // set in dsd_init - output for DSD: DOP, DSD_U8, ...
+	unsigned dsd_delay;		   // set in dsd_init - delay in ms switching to/from dop
 #endif
 };
 
@@ -633,10 +638,11 @@
 
 // dop.c
 #if DSD
-bool is_flac_dop(u32_t *lptr, u32_t *rptr, frames_t frames);
+bool is_stream_dop(u8_t *lptr, u8_t *rptr, int step, frames_t frames);
 void update_dop(u32_t *ptr, frames_t frames, bool invert);
-void dop_silence_frames(u32_t *ptr, frames_t frames);
-void dop_init(bool enable, unsigned delay);
+void dsd_silence_frames(u32_t *ptr, frames_t frames);
+void dsd_invert(u32_t *ptr, frames_t frames);
+void dsd_init(dsd_format format, unsigned delay);
 #endif
 
 // codecs
diff -ruN '--exclude=.git' squeezelite-1.8.orig/vorbis.c squeezelite-1.8/vorbis.c
--- squeezelite-1.8.orig/vorbis.c	2015-10-07 06:28:19.303982757 +0000
+++ squeezelite-1.8/vorbis.c	2015-10-16 11:41:41.844801323 +0000
@@ -150,7 +150,7 @@
 		LOG_INFO("setting track_start");
 		LOCK_O_not_direct;
 		output.next_sample_rate = decode_newstream(info->rate, output.supported_rates); 
-		IF_DSD(	output.next_dop = false; )
+		IF_DSD(	output.next_fmt = PCM; )
 		output.track_start = outputbuf->writep;
 		if (output.fade_mode) _checkfade(true);
 		decode.new_stream = false;
