unsorted C snippets for small/fast static apps

For discussions about programming, programming questions/advice, and projects that don't really have anything to do with Puppy.
Message
Author
User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

unsorted C snippets for small/fast static apps

#1 Post by technosaurus »

Code: Select all

#ifndef _STRING_H
#include <string.h> /* for strlen */
#endif
#ifndef _UNISTD_H
#include <unistd.h> /* for write */
#endif
#ifndef _STDLIB_H
#include <stdlib.h> /* for exit */
#endif

/* use fast bit shift ops to convert a string of 4 chars to an int*/
#define C4I(a,b,c,d) ((((((((d))<<8)|(c))<<8)|(b))<<8)|(a))
#define S2I(s) C4I(s[0], s[1], s[2], s[3])

/* convert 8 chars to long long (you can't switch-case a string) */
#define C8LL(a,b,c,d,e,f,g,h) ((((((((((((((((long long)h)<<8)|\
	((long long)g))<<8)|((long long)f))<<8)|((long long)e))<<8)|\
	((long long)d))<<8)|((long long)c))<<8)|((long long)b))<<8)|((long long)a))
#define S2LL(s) C8LL(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7])

/* for lossy string compression - useful for switch comparison of strings */
#define C2I(c) switch(c){ \
case '\0' : (0); break; \
case 'a' : case 'A' : (1); break: \
case 'b' : case 'B' : (2); break: \
case 'c' : case 'C' : (3); break: \
case 'd' : case 'D' : (4); break: \
case 'e' : case 'E' : (5); break: \
case 'f' : case 'F' : (6); break: \
case 'g' : case 'G' : (7); break: \
case 'h' : case 'H' : (8); break: \
case 'i' : case 'I' : (9); break: \
case 'j' : case 'J' : (10); break: \
case 'k' : case 'K' : (11); break: \
case 'l' : case 'L' : (12); break: \
case 'm' : case 'M' : (13); break: \
case 'n' : case 'N' : (14); break: \
case 'o' : case 'O' : (15); break: \
case 'p' : case 'P' : (16); break: \
case 'q' : case 'Q' : (17); break: \
case 'r' : case 'R' : (18); break: \
case 's' : case 'S' : (19); break: \
case 't' : case 'T' : (20); break: \
case 'u' : case 'U' : (21); break: \
case 'v' : case 'V' : (22); break: \
case 'w' : case 'W' : (23); break: \
case 'x' : case 'X' : (24); break: \
case 'y' : case 'Y' : (25); break: \
case 'z' : case 'Z' : (26); break: \
case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : case '#' : case '+' : (27); break: \
case '(' : case ')' : case '{' : case '}' : case '[' : case ']' : case '\"' : case '\'' : case '<' : case '>' : case '/' : case '\\' : (28); break: \
case ' ' : case '.' : case '-' : case '_' : case '~' : case ',' : (29); break: \
case ':' : case ';' : case '|' : case '!' : case '@' : case '$' : case '%' : case '&' : case '*' : (30); break: \
default : (31); break; }

#define ULL (unsigneed long long)
/* this one is case insensitive & bumps it up to 12 chars 
 * with room for 4 more bits, but it combines non-alpha characters */
#define S12LL(s) \
(	((s[0])==0)? 0 : ULL C2I(s[0]) | \
	((s[1])==0)? 0 : ULL C2I(s[1])<<5)) | \
	((s[2])==0)? 0 : ULL C2I(s[2])<<10)) | \
	((s[3])==0)? 0 : ULL C2I(s[3])<<15)) | \
	((s[4])==0)? 0 : ULL C2I(s[4])<<20)) | \
	((s[5])==0)? 0 : ULL C2I(s[5])<<25)) | \
	((s[6])==0)? 0 : ULL C2I(s[6])<<30)) | \
	((s[7])==0)? 0 : ULL C2I(s[7])<<35)) | \
	((s[8])==0)? 0 : ULL C2I(s[8])<<40)) | \
	((s[9])==0)? 0 : ULL C2I(s[9])<<45)) | \
	((s[10])==0)? 0 : ULL C2I(s[10])<<50)) | \
	((s[11])==0)? 0 : ULL C2I(s[11])<<55)) \
)	/* generate 2nd string in bash with "${word:12:99}" for longer strings */


/* this is an example of how to uses strings as sprites */
/* acceptable values in shape strings are 0123 - Set by the -'0'*/
#define shapeit444(s) ( ( s[0]-'0' )|( s[1]-'0'<<2 )|( s[2]-'0'<<4 )|\
( s[3]-'0'<<6 )|( s[4]-'0'<<8 )|( s[5]-'0'<<10 )|( s[6]-'0'<<12 )|\
( s[7]-'0'<<14 )|( s[8]-'0'<<16 )|( s[9]-'0'<<18 )|( s[10]-'0'<<20 )|\
( s[11]-'0'<<22 )|( s[12]-'0'<<24 )|( s[13]-'0'<<26 )|( s[14]-'0'<<28 )|\
( s[15]-'0'<<30 ) )

#define EXAMPLESHAPE444 shapeit( \
(   "1023" \
    "1022" \
    "1000" \
    "1111" ))

#define BIT(x) (1 << (x)) /*bit 3 would be 0000100 */
#define NUM_ELEMENTS(a) (sizeof(a)/sizeof(a[0])) /* argc is useless */
#define MAX(a, b)	((a < b) ? b : a)
#define MIN(a, b)	((a > b) ? b : a)
#define ABS(a)    	((a < 0) ? -a : a)
#define SWAP(a, b)  do { a ^= b; b ^= a; a ^= b; } while ( 0 )

/* write is much much smaller than printf */
#define W1(s) ((NULL != s) ? write(1,s, strlen(s)) : write(1,"", 1) )
#define W2(s) ((NULL != s) ? write(2,s, strlen(s)) : write(2,"", 1) )

/* only works with constant integers such as __LINE__ macro to convert to strings */
#define D2S(n) D2S_(n)
#define D2S_(n) #n
#define LINE_NO D2S(__LINE__)

/* GNU helper crap for verbose debugging, could probably be simplified */
#ifdef __STDC__
#define HAVE_STDC 1
#else
#define HAVE_STDC 0
#endif
#ifdef __STDC_VERSION__
#define HAVE_STDC_VERSION 1
#else
#define HAVE_STDC_VERSION 0
#endif
#ifdef __GNUC__
#define HAVE_GNUC 1
#else
#define HAVE_GNUC 0
#endif
#ifdef __OPTIMIZE__
#define HAVE_OPTIMIZE 1
#else
#define HAVE_OPTIMIZE 0
#endif
#ifdef __OPTIMIZE_SIZE__
#define HAVE_OPTIMIZE_SIZE 1
#else
#define HAVE_OPTIMIZE_SIZE 0
#endif
#ifdef __NO_INLINE__
#define HAVE_NO_INLINE 1
#else
#define HAVE_NO_INLINE 0
#endif
#ifdef __STDC_HOSTED__
#define HAVE_HOSTED 1
#else
#define HAVE_HOSTED 0
#endif
#ifdef __STRICT_ANSI__
#define HAVE_STRICT 1
#else
#define HAVE_STRICT 0
#endif

#ifdef DEBUG
#define D(s)	W2("START Debug message : \n");W2(s); \
				W2("\n\tIn function: ");W2(__func__); \
				W2(", from file: ");W2(__FILE__); \
				W2(", line:");W2(LINE_NO); \
				W2("\n\tInclude at a depth of: ");W2(D2S(__INCLUDE_LEVEL__)); \
				W2(", from base file:");W2(__BASE_FILE__); \
				W2("\n\tCompiled Date: ");W2(__DATE__); \
				W2(", at Time: ");W2(__TIME__); \
				W2("\n\tMax values are char: ");W2(D2S(__SCHAR_MAX__)); \
				W2(", short: ");W2(D2S(__SHRT_MAX__)); \
				W2(", int: ");W2(D2S(__INT_MAX__)); \
				W2(",\n\t long: ");W2(D2S(__LONG_MAX__)); \
				W2(", long long: ");W2(D2S(__LONG_LONG_MAX__)); \
				if (HAVE_STDC) W2("\n\tConforms to ISO Standard C"); \
				if (HAVE_STDC_VERSION){ W2("\n\tC standard version=");W2(D2S(__STDC_VERSION__)); } \
				if (HAVE_GNUC) { W2("\n\tgcc version ");W2(__VERSION__); } \
				if (HAVE_OPTIMIZE) {W2("\n\tOptimized build");}else{W2("\n\tUn-ptimized build");} \
				if (HAVE_OPTIMIZE_SIZE) W2("\n\tOptimized for size"); \
				if (HAVE_NO_INLINE) {W2("\n\tInlining not disabled");}else{W2("\n\tInlining disabled");} \
				if (HAVE_HOSTED) W2("\n\tHosted environment"); \
				if (HAVE_STRICT) W2("\n\tUsing strict ansi"); \
				W2("\nEND Debug message\n")
#else
#define D(s)	/* when -DNDEBUG is passed at build, no debugging messages */
#endif

#define DIE(s,i) D(s);exit(i)


{ /* for CGI strings */
#ifndef _STDLIB_H
#include <stdlib.h> /* for getenv */
#endif
#define QUERY_STRING 			(char *)getenv("QUERY_STRING")
#define GATEWAY_INTERFACE 		(char *)getenv("GATEWAY_INTERFACE")
#define REMOTE_ADDR				(char *)getenv("REMOTE_ADDR")
#define HTTP_USER_AGENT			(char *)getenv("HTTP_USER_AGENT")
#define REMOTE_PORT				(char *)getenv("REMOTE_PORT")
#define HTTP_ACCEPT				(char *)getenv("HTTP_ACCEPT")
#define SCRIPT_FILENAME			(char *)getenv("SCRIPT_FILENAME")
#define HTTP_HOST				(char *)getenv("HTTP_HOST")
#define REQUEST_URI				(char *)getenv("REQUEST_URI")
#define SERVER_SOFTWARE			(char *)getenv("SERVER_SOFTWARE")
#define HTTP_ACCEPT_LANGUAGE	(char *)getenv("HTTP_ACCEPT_LANGUAGE")
#define SERVER_PROTOCOL			(char *)getenv("SERVER_PROTOCOL")
#define PATH_INFO				(char *)getenv("PATH_INFO")
#define REQUEST_METHOD			(char *)getenv("REQUEST_METHOD")
#define PWD						(char *)getenv("PWD")
#define SCRIPT_NAME				(char *)getenv("SCRIPT_NAME")
} /* CGI strings */

#define CHAR(x) ((char)(x))
#define INT(x) ((int)(x))
#define LONGLONG(x) ((long long)(x))
#define FLOAT(x) ((float *)(x))
#define DOUBLE(x) ((double)(x))
#define CHAR_(x) ((char *)(x))

/* these could be macros defined to 0,1,2,3...*/
enum { up,down,left,right };
enum { UP,DOWN,LEFT,RIGHT };

enum { north,south,west,east };
enum { NORTH,SOUTH,WEST,EAST };

Last edited by technosaurus on Fri 13 Mar 2015, 08:17, edited 2 times in total.
Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

#2 Post by technosaurus »

see later post
Last edited by technosaurus on Tue 02 Oct 2012, 08:37, edited 1 time in total.
Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

SORT macro

#3 Post by technosaurus »

Edit: there is a small bug in the sorting that I need to fix if the list is presorted.
This may look short and sweet, but it does a double sided selection sort and uses only low level operations, and should work on C arrays of any type that can work with <,>,== operations (basically no weird structs)

Code: Select all

#define SORT(a,buf) do { \
size_t head=0,tail=((sizeof(a)/sizeof(a[0]))-1),min=0,max=tail,p=0; \
do{ while(++p<=tail){ \
		if (a[p]>a[max]){ max=p; \
		}else{ if (a[p]<a[min]) min=p; }  } \
	if (a[min]<a[head]){ buf=a[head];a[head]=a[min];a[min]=buf; \
		if (max==head) max=min; } \
	if (a[max]>a[tail]){ buf=a[tail];a[tail]=a[max];a[max]=buf;}  \
	p=++head; \
}while(head<--tail); \
}while(0)
Note: you can eliminate the need to pass a buffer (buf) of the same type if you replace the buffer swaps with xor swaps, which look something like:
a^=b;b^=a^=b;
or use memcpy on with a generic buffer like char[sizeof(array[0])]
or use assembly such as in MIT licensed musl libc:
static inline int a_swap(volatile int *x, int v)
{
__asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
return v;
}
I haven't played around enough with it to compare speeds but it should be a good balance of speed compared to memory usage and code size.
Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

smaller printf functions

#4 Post by technosaurus »

alternative printf functions (<1kb) handles strings, int, float, hex and char (%s,%d,%f,%{X,x}%c)

Code: Select all

#include <string.h> /* for strlen */
#include <stdarg.h> /* for VA_ARGS */
#include <fcntl.h> /* for open */

# define NARGS(...) NARGS_(__VA_ARGS__, \
	99, 98, 97, 96, 95, 94, 93, 92, 91, 90, \
	89, 88, 87, 86, 85, 84, 83, 82, 81, 80, \
	79, 78, 77, 76, 75, 74, 73, 72, 71, 70, \
	69, 68, 67, 66, 65, 64, 63, 62, 61, 60, \
	59, 58, 57, 56, 55, 54, 53, 52, 51, 50, \
	49, 48, 47, 46, 45, 44, 43, 42, 41, 40, \
	39, 38, 37, 36, 35, 34, 33, 32, 31, 30, \
	29, 28, 27, 26, 25, 24, 23, 22, 21, 20, \
	19, 18, 17, 16, 15, 14, 13, 12, 11, 10, \
	9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
# define NARGS_(n1, n2, n3, n4, n5, n6, n7, n8, n9, \
	n10, n11, n12, n13, n14, n15, n16, n17, n18, n19, \
	n20, n21, n22, n23, n24, n25, n26, n27, n28, n29, \
	n30, n31, n32, n33, n34, n35, n36, n37, n38, n39, \
	n40, n41, n42, n43, n44, n45, n46, n47, n48, n49, \
	n50, n51, n52, n53, n54, n55, n56, n57, n58, n59, \
	n60, n61, n62, n63, n64, n65, n66, n67, n68, n69, \
	n70, n71, n72, n73, n74, n75, n76, n77, n78, n79, \
	n80, n81, n82, n83, n84, n85, n86, n87, n88, n89, \
	n90, n91, n92, n93, n94, n95, n96, n97, n98, n99, \
	n, ...) n

#define write1(arg) write(1,arg,strlen(arg));
#define swrite(i,...) swrite_(i,NARGS(__VA_ARGS__), __VA_ARGS__)
#define swrite1(...) swrite_(1,NARGS(__VA_ARGS__), __VA_ARGS__)
#define swrite2(...) swrite_(2,NARGS(__VA_ARGS__), __VA_ARGS__)

void swrite_(int fd, unsigned int NARGS, ...){
char *arg;
va_list ap;
va_start(ap, NARGS);
while (NARGS-->0) {
	arg = va_arg(ap, char*);
	write(fd,arg,strlen(arg));
}
va_end(ap);
}

char *dtox(int d, int c){
static char buf[80];
if (0==d)return "0x0";
int p=0;
c-=10;
if (0>d) { buf[p++]='-'; d=-d;}
buf[p++]='0';buf[p++]='x';
unsigned int i=1<<28;
while (i>d) i/=16;
while (0<i){
	((d/i)>9) ? (buf[p++]=(c+(d/i))) : (buf[p++]=('0'+(d/i)));
	if (i<=d) d=d%i;
	i/=16;
}
buf[p]=0;
return buf;
}

char *dtos(int d){
static char buf[80];
int p=0;
if (0>d) { buf[p++]='-'; d=-d;}
if (0==d) { buf[p++]='0'; buf[p]=0; return buf; }
int i=1000000000;
while (i > d) i/=10;
while (0<i){
	buf[p++]=('0'+(d/i));
	if (i<=d) d=d%i;
	i/=10;
}
buf[p]=0;
return buf;
}

//if we add support for # of decimal places, round by adding .5 and cast to int
char *ftos(double f){
static char *buf;
buf=dtos((int) f);
int p=strlen(buf);
	buf[p++]='.';
	if (f<0) f=-f;
for (f*=10; (p<16) && (((int)f%10) >=0); f*=10 ){
	buf[p++]=('0'+((int)f)%10);
}
	buf[p]=0;
return buf;
}

void Tfprintf_(int fd,int nargs, ...){
	va_list ap; va_start(ap, nargs); static char *s; const char *fmt;
	fmt=va_arg(ap, char *);
	int i=0,p;
	char buf[80];
while (fmt[i]){
	p=0;
	while (fmt[i]!='%' && fmt[i]!=0) buf[p++]=fmt[i++];
	if (p != 0) write(fd,buf,p);
	if (nargs-->0){ 
		switch (fmt[++i]) {
		case 's': s=va_arg(ap, char *); break;
		case 'd': s=dtos(va_arg(ap, int)); break;
		case 'x': s=dtox(va_arg(ap, int),'a'); break;
		case 'X': s=dtox(va_arg(ap, int),'A' ); break;
		case 'f': s=ftos(va_arg(ap, double)); break;
		case 'c': buf[0]=(va_arg(ap, int));buf[1]=0;s=buf;break;
		case '%': s="%"; break;
		default : s="";break;
		} write(fd,s,strlen(s));
	}
i++;
}
va_end(ap);
}


#define Tfprintf(fd, ...) Tfprintf_(fd,NARGS(__VA_ARGS__), __VA_ARGS__)
#define Tprintf(...) Tfprintf(1, __VA_ARGS__) /* to stdout */
#define Teprintf(...) Tfprintf(2, __VA_ARGS__) /* to stderr */

int main(int argc, char **argv){
swrite1( "hello ",argv[0], " world on stdout\n");
swrite2( "hello ",argv[0], " world on stderr\n");
int fd=open("test",O_CREAT|O_WRONLY|O_APPEND);
  swrite(fd, "hello ",argv[0], " world on stdout\n");
close(fd);
Tprintf("begin %s %d %x %X %f %c %% end\n", "hello world", 255, 0xff, 0xFF, -3.4, 'Q');
}
Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

#5 Post by technosaurus »

playing with duff's device - not yet tested:

Code: Select all

POW (a,b,c) do{
c=1;
if (b==0) break;
register n = (b + 9) / 10;
	switch (b%10){
	case 0 : do { c*=a;
	case 9 : c*=a;
	case 8 : c*=a;
	case 7 : c*=a;
	case 6 : c*=a;
	case 5 : c*=a;
	case 4 : c*=a;
	case 3 : c*=a;
	case 2 : c*=a;
	case 1 : c*=a;
			} while(--n > 0);
	}
}while (b>0)
Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

#6 Post by technosaurus »

Here is an example working with file extensions to get MIME-type (though you could rearrange/replace the case statements with anything you want) see libmagic for a content based solution - this just uses the extensions and doesn't open or even stat the file.

Code: Select all

/** in order to compare strings you can't directly use a switch like:
 * switch (string){ case "mystring" : ... }
 * because switch only works on int types (char, short, long, long long)
 * so we bit shift the strings into a compatible int type
 * 
 **/
 
/* first compare 8 then >>16 and compare 6,... */
#define LL (long long)
#define C2I(a,b) ((a)|((b)<<8))
#define C4I(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24))
#define C6I(a,b,c,d,e,f) ((a)|((b)<<8)|((c)<<16)|((d)<<24)|(LL(e)<<32)|(LL(f)<<40))
#define C8I(a,b,c,d,e,f,g,h) \
	((a)|((b)<<8)|((c)<<16)|((d)<<24)|(LL(e)<<32)|(LL(f)<<40)|(LL(g)<<48)|(LL(h)<<56))

#define LOWER(c) ( 'A' <= c && c <= 'Z' ) ? (c | 32) : (c)

static long long intify(char *s){
char buf[80]="\0\0\0\0\0\0\0\0";
int i=0,j=9;
while (s[i++]){}
while (i-- && j--) {buf[j]=LOWER(s[i]);}
return(C8I(buf[0],buf[1],buf[2],buf[3],buf[4],buf[5],buf[6],buf[7]));
}

static char *MIME_TYPE(long long i){
switch (i) {
case C8I('m','a','k','e','f','i','l','e') : return("text/x-makefile");
//case C8I('.','t','a','r','.','b','z','2') : return("");
case C8I('.','t','e','x','i','n','f','o') : return("text/x-texinfo");
case C8I('.','x','c','f','.','b','z','2') : return("image/x-compressed-xcf");
}
switch (i>>16) {
case C6I('u','t','h','o','r','s') : case C6I('r','e','d','i','t','s') :
									return("text/x-authors");
case C6I('o','p','y','i','n','g') : case C6I('i','c','e','n','s','e') :
									return("text/x-copying");
case C6I('.','g','t','k','r','c') : return("text/x-gtkrc");
case C6I('n','s','t','a','l','l') : return("text/x-install");
case C6I('.','m','c','v','i','e') : return("video/x-sgi-movie");
case C6I('.','p','a','t','c','h') : case C6I('d','p','a','t','c','h') :
									return("text/x-diff");
case C6I('r','e','a','d','m','e') : return("text/x-readme");
//case C6I('t','a','r','.','g','z') : return("");
case C6I('x','c','f','.','g','z') : return("image/x-compressed-xcf");
case C6I('.','v','c','a','r','d') : return("text/vcard");
}
switch (i>>32) {
case C4I('.','3','d','s')	:	return("image/x-3ds");
case C4I('.','a','a','c')	:	return("audio/x-aac");
case C4I('.','a','b','w')	:	return("text/abiword");
case C4I('.','a','n','i')	:	case C4I('a','n','i','m') :
case C4I('n','i','m','0')	:	case C4I('n','i','m','6') :
case C4I('n','i','m','1')	:	case C4I('n','i','m','7') :
case C4I('n','i','m','2')	:	case C4I('n','i','m','8') :
case C4I('n','i','m','3')	:	case C4I('n','i','m','9') :
case C4I('n','i','m','4')	:	case C4I('n','i','m','j') :
case C4I('n','i','m','5')	:	return("video/x-anim");
case C4I('.','a','s','f')	:	case C4I('.','a','s','x') :
								return("video/x-ms-asf");
case C4I('a','t','o','m')	:	return("application/atom+xml");
case C4I('.','a','v','i')	:	return("video/x-msvideo");
case C4I('.','b','f','e')	:	return("application/x-bfe");
case C4I('.','b','i','b')	:	return("text/bib");
case C4I('.','b','m','p')	:	return("image/bmp");
case C4I('.','c','+','+')	:	case C4I('.','c','p','p') :
								return("text/x-c++");
case C4I('.','c','g','m')	:	return("image/cgm");
case C4I('.','c','s','h')	:	return("text/x-csh");
case C4I('.','c','s','s')	:	return("text/css");
case C4I('.','c','s','v')	:	return("text/x-comma-separated-values");
case C4I('.','d','c','l')	:	return("text/x-dcl");
case C4I('.','d','e','b')	:	return("application/x-deb");
case C4I('d','i','f','f')	:	return("text/x-diff");
case C4I('.','d','o','c')	:	return("application/msword");
case C4I('d','o','c','x')	:	case C4I('d','o','c','m') :
	return("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
case C4I('.','d','s','l')	:	return("text/x-dsl");
case C4I('.','d','t','d')	:	return("text/x-dtd");
case C4I('.','d','v','i')	:	return("application/x-dvi");
case C4I('.','d','w','g')	:	return("image/vnd.dwg");
case C4I('.','d','x','f')	:	return("image/vnd.dxf");
case C4I('.','e','d','i')	:	return("application/EDI-X12");
case C4I('.','e','m','l')	:	return("fail/rfc822");
case C4I('.','e','t','x')	: 	return("text/setext");
case C4I('.','f','i','g')	:	return("image/x-xfig");
case C4I('f','l','a','c')	:	return("audio/x-flac");
case C4I('.','f','l','c')	:	return("video/x-flc");
case C4I('.','f','l','i')	:	return("video/x-fli");
case C4I('.','f','l','v')	:	return("video/x-flv");
case C4I('.','g','i','f')	:	return("image/gif");
case C4I('.','h','+','+')	:	case C4I('.','h','p','p') :
								return("text/x-c-h++");
case C4I('.','h','t','m')	:	case C4I('h','t','m','l') :
								return("text/html");
case C4I('.','i','c','o')	:	return("image/vnd.microsoft.icon");
case C4I('.','i','c','s')	:	return("text/calendar");
case C4I('.','i','d','l')	:	return("text/x-idl");
case C4I('.','i','e','f')	:	return("image/ief");
case C4I('.','i','f','f')	:	return("image/x-iff");
case C4I('i','l','b','m')	:	return("image/x-ilbm");
case C4I('.','i','s','o')	:	return("application/x-cd-image");
case C4I('j','a','v','a')	:	return("text/x-java");
case C4I('.','j','p','g')	:	case C4I('j','p','e','g') :
case C4I('.','j','p','e')	:	return("image/jpeg");
case C4I('j','s','o','n')	:	return("application/json");
case C4I('.','l','w','o')	:	case C4I('l','w','o','b') :
								return("image/x-lwo");
case C4I('.','l','w','s')	:	return("image/x-lws");
case C4I('.','i','y','x')	:	return("text/x-lyx");
case C4I('m','b','o','x')	:	return("application/mbox");
case C4I('.','m','h','t')	:	return("multipart/related");
case C4I('.','m','k','a')	:	return("audio/x-matroska");
case C4I('.','m','k','v')	:	case C4I('m','k','3','d') :
								return("video/x-matroska");
case C4I('.','m','m','l')	:	return("text/mathml");
case C4I('.','m','p','3')	:	return("audio/mp3");
case C4I('.','m','p','4')	:	return("video/mp4");
case C4I('.','m','o','v')	:	return("video/quicktime");
case C4I('.','m','p','g')	:	case C4I('m','p','e','g') :
case C4I('.','m','p','2')	:	case C4I('.','m','p','e') :
case C4I('.','d','a','t')	:	case C4I('.','v','o','b') :
								return("video/mpeg");
case C4I('.','o','d','b')	:	case C4I('f','o','d','b') :
	return("application/vnd.oasis.opendocument.database");
case C4I('.','o','d','c')	:	case C4I('f','o','d','c') :
	return("application/vnd.oasis.opendocument.chart");
case C4I('.','o','d','f')	:	case C4I('f','o','d','f') :
	return("application/vnd.oasis.opendocument.formula");
case C4I('.','o','d','g')	:	case C4I('f','o','d','g') :
	return("application/vnd.oasis.opendocument.graphics");
case C4I('.','o','d','i')	:	case C4I('f','o','d','i') :
	return("application/vnd.oasis.opendocument.image");
case C4I('.','o','d','m')	:	case C4I('f','o','d','m') :
	return("application/vnd.oasis.opendocument.text-master");
case C4I('.','o','d','p')	:	case C4I('f','o','d','p') :
	return("application/vnd.oasis.opendocument.presentation");
case C4I('.','o','d','s')	:	case C4I('f','o','d','s') :
	return("application/vnd.oasis.opendocument.spreadsheet");
case C4I('.','o','d','t')	:	case C4I('f','o','d','t') :
	return("application/vnd.oasis.opendocument.text");
case C4I('.','o','g','a')	:	return("audio/ogg");
case C4I('.','o','g','g')	:	return("application/ogg");
case C4I('.','o','g','v')	:	return("video/ogg");
case C4I('.','o','t','b')	:	return("application/vnd.oasis.opendocument.database-template");
case C4I('.','o','t','c')	:	return("application/vnd.oasis.opendocument.chart-template");
case C4I('.','o','t','f')	:	return("application/vnd.oasis.opendocument.formula-template");
case C4I('.','o','t','g')	:	return("application/vnd.oasis.opendocument.graphics-template");
case C4I('.','o','t','i')	:	return("application/vnd.oasis.opendocument.image-template");
case C4I('.','o','t','m')	:	return("application/vnd.oasis.opendocument.text-master-template");
case C4I('.','o','t','p')	:	return("application/vnd.oasis.opendocument.presentation-template");
case C4I('.','o','t','s')	:	return("application/vnd.oasis.opendocument.spreadsheet-template");
case C4I('.','o','t','t')	:	return("application/vnd.oasis.opendocument.text-template");
case C4I('.','p','b','m')	:	return("image/x-portable-bitmap");
case C4I('.','p','d','f')	:	return("application/pdf");
case C4I('p','e','r','l')	:	return("text/x-perl");
case C4I('.','p','g','m')	:	return("image/x-portable-graymap");
case C4I('.','p','n','g')	:	return("image/png");
case C4I('.','p','n','m')	:	return("image/x-portable-anymap");
case C4I('.','p','o','v')	:	return("text/x-povray");
case C4I('.','p','p','m')	:	return("image/x-portable-pixmap");
case C4I('.','p','p','t')	:	case C4I('.','p','p','s') :
								return("application/vnd.ms-powerpoint");
case C4I('p','p','s','x')	:	case C4I('p','p','t','x') :
case C4I('p','p','s','m')	:	case C4I('p','p','t','m') :
	return("application/vnd.openxmlformats-officedocument.presentationml.presentation");
case C4I('.','p','s','d')	:	return("image/x-psd");
case C4I('.','r','a','r')	:	return("application/x-rar-compressed");
case C4I('.','r','a','s')	:	return("image/x-cmu-raster");
case C4I('.','r','d','f')	:	return("application/rdf+xml");
case C4I('.','r','g','b')	:	return("image/x-rgb");
case C4I('r','o','f','f')	:	return("text/x-troff");
case C4I('.','r','s','s')	:	return("application/rss+xml");
case C4I('.','r','t','f')	:	case C4I('.','r','t','x') :
								return("text/richtext");
case C4I('.','s','c','m')	: 	return("text/x-scheme");
case C4I('.','s','g','m')	:	case C4I('s','g','m','l') :
								return("text/sgml");
case C4I('.','s','l','k')	:	case C4I('s','y','l','k') :
								return("text/spreadsheet");
case C4I('.','s','p','x')	:	return("audio/speex");
case C4I('.','s','q','l')	: 	return("text/x-sql");
case C4I('.','s','v','g')	:	return("image/svg+xml");
case C4I('.','s','w','f')	:	return("application/x-shockwave-flash");
case C4I('.','t','a','r')	:	return("application/x-tar");
case C4I('.','t','c','l')	: 	return("text/x-tcl");
case C4I('.','t','e','x')	:	return("application/x-latex");
case C4I('t','e','x','i')	:	return("text/x-texinfo");
case C4I('.','t','g','a')	:	return("image/x-tga");
case C4I('.','t','i','f')	:	case C4I('t','i','f','f') :
								return("image/tiff");
case C4I('.','t','s','v')	:	return("text/tab-separated-values");
case C4I('.','t','t','f')	:	return("application/x-font-ttf");
case C4I('.','t','x','t')	:	case C4I('.','a','s','c') :
								return("text/plain");
case C4I('.','u','r','i')	: 	case C4I('.','u','r','l') :
								return("text/x-uri");
case C4I('.','v','c','f')	: 	case C4I('g','c','r','d') :
								return("text/vcard");
case C4I('.','w','a','v')	:	return("audio/wav");
case C4I('w','e','b','m')	:	return("video/webm");
case C4I('.','w','m','f')	:	return("image/x-wmf");
case C4I('w','o','f','f')	:	return("application/font-woff");
case C4I('.','w','m','v')	:	return("video/x-ms-wmv");
case C4I('.','w','r','l')	:	return("model/vrml");
case C4I('.','x','b','m')	:	return("image/x-xbitmap");
case C4I('.','x','c','f')	:	return("image/x-xcf");
case C4I('.','x','h','t')	:	case C4I('x','h','t','m')	:	
								return("application/xhtml+xml");
case C4I('.','x','l','s')	:	return("application/vnd.ms-excel");
case C4I('x','l','s','x') : case C4I('x','l','s','m') : case C4I('x','l','s','b') :
	return("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
case C4I('.','x','m','l')	:	return("text/xml");
case C4I('.','x','p','m')	:	return("image/x-xpixmap");
case C4I('.','x','w','d')	:	return("image/x-xwindowdump");
case C4I('.','z','i','p')	:	return("application/zip");
}
switch (i>>48){						/* check shorter extensions */
case C2I('a','g')	:			return("image/x-applix-graphic");
case C2I('a','u')	:			return("audio/basic");
case C2I('.','c')	:			return("text/x-c");
case C2I('c','c')	:			return("text/x-c++");
case C2I('e','l')	:			return("text/x-emacs-lisp");
case C2I('e','s')	:			return("application/ecmascript");
case C2I('.','f')	:			return("text/x-fortran");
case C2I('g','z')	:			return("application/gzip");
case C2I('.','h')	:			return("text/x-c-h");
case C2I('h','p')	:			return("text/x-c-h++");
case C2I('j','s')	:			return("text/javascript");
case C2I('m','e')	:			return("text/x-troff-me");
case C2I('m','m')	:			return("text/x-troff-mm");
case C2I('m','s')	:			return("text/x-troff-ms");
case C2I('p','l')	:			return("text/x-perl");
case C2I('p','s')	:			return("application/postscript");
case C2I('p','y')	:			return("text/x-python");
case C2I('q','t')	:			return("video/quicktime");
case C2I('.','s')	:			return("text/plain");
case C2I('s','h')	:			return("text/x-shellscript");
case C2I('.','t') : case C2I('t','r') : return("text/x-troff");
}
return(((i>>56)=='/') ? "inode/directory" : "Unknown type"); 
}

int main(int argc, char **argv){
char *mime_t=MIME_TYPE(intify(argv[1]));
write(1,mime_t,strlen(mime_t));
}
Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

goingnuts
Posts: 932
Joined: Sun 07 Dec 2008, 13:33
Contact:

#7 Post by goingnuts »

Your "file"-code really is nice! Thanks. Its extremely fast. I have used hexd in combination with a shell script in pUPnGO for some time. Maybe a combination of the two could yield an even better replacement for file...and avoid calling the shell/other bins...attached the shell-script that is to be used with the hexd-binary.
Some timings:
Your file:

Code: Select all

real    0m0.000s
user    0m0.000s
sys     0m0.000s
hexd+script:

Code: Select all

real    0m0.010s
user    0m0.000s
sys     0m0.008s
"real" file-program:

Code: Select all

real    0m0.042s
user    0m0.036s
sys     0m0.008s
finding type of a mp3-file (your "file" and the "real"-file gives correct type whereas the hexd gives wrong: ASCII text or data).
Attachments
file.gz
fake gz file-script
(3.96 KiB) Downloaded 1235 times

User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

#8 Post by technosaurus »

the "file" implementation was just a simple test case. If that was its full purpose, I never would have bothered. Where it does matter is on a web server with thousands of clients in order to tell the browser what kind of file it is about to send (from all of the web servers I checked, that code was a bottleneck) ... in this case, the difference between 1/100th(+) of a second and 1/1000th(-) can make a _HUGE_ difference

Story:
I was thinking about writing my own web server and wanted it to be small, fast, AND functional ... nweb seemed to be the most basic starting point, but it only accepted a few file types and the code wasn't very efficient (sometimes space efficient code does note equate to efficient code) so I set out to optimize it as follows
but...
a series of if strcasecmp ... has a lot of underlying calls in each one:
recalling my switch case trick used for the first 4 or 8 characters only needed to do 1 check since the characters get compiled to an integer constant ... seemed like a good starting point

problems:
1. need the end of a variable length string
2. has to be 8 characters long
3. needs to be case insensitive

1 (this is actually multiple problems)
I could use strlen(s) and store it in an int ... and use it to work backwards, but after looking at how various strlen implementations worked, it was obvious that it would be better to set up my own counter variable to loop to the end of the string, meaning I could use that counter as my length variable and even reuse it as my counter.

2. I didn't want to overwrite anything in the original string so I needed a char *buffer to keep a lower case 8 character string. I already had the end of the string stored, so all I needed to do was work backward and copy the last 8 chars to the buffer. I also needed to pad zeroes to the beginning so that it worked for short files such as aa.gz

3. rather than setting up a second buffer and doing a tolower(), it seemed like a pretty good idea to lower case it as it was copied over ... the most obvious way is to check if the char falls in the A-Z range and add +('a'-'A') ... which would reduce to a constant and be sufficiently fast, but A and a are exactly one bit different (see ascii table) so I figured there was _some_ bit shift operation that would work (bit ops are typically faster than an add) thus the | 32 in
( 'A' <= c && c <= 'Z' ) ? (c | 32) : (c)

Note: initially I use a char* ret value to hold the type, but it is more efficient (code size and compiler wise both) to just return int as a char* function

feel free to use it in a file redo (that is basically what the main() function is) or webserver or in any other code that uses a large series of strcmp() calls.
Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

#9 Post by technosaurus »

some printf functions don't have a %b to print binary representation of numbers - this will do all types from 1 to 64 bits

Code: Select all

#define putbin(d) do{ \
unsigned long long i=1ULL<<63; \
while (!((unsigned long long) d & (i/=2))); /* optional - remove leading zeroes */ \
do { ( (unsigned long long) d & i) ? write(1,"1",1) : write(1,"0",1); } while (i/=2); \
write(1,"\n",1); /* optional - add new line */ \
} while (0)
Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

#10 Post by technosaurus »

here are a bunch of alternative string functions that I wrote (the ones I most commonly encounter), just remove the T-prefix and use them instead of including <string.h>

most are a level of magnitude smaller than their libc counterparts (including smaller libc implementations) relying on simple design and the compiler for speed improvements (some ended up being faster and none appeared to be significantly slower) They may show to be less efficient for large strings because I didn't do any casting tricks to compare 4 or 8 characters at a time, mainly do to the extra logic taking up nearly as many instructions as just leaving it simple and letting the compiler help.

i

Code: Select all

int Tstrcmp(const char *l, const char *r){
while ( *l && *r && (l++[0] == r++[0]) );
return *l - *r;
}

char *Tstrrchr(char *s, int c){
unsigned int i=0,j=0;
do{	if ( s[i]==c ) j=i;
}while (s[i++]);
return (j) ? s+=j : "";
}

char *Tstrchr(char *s, int c){
unsigned int i=0;
while ( (s[i]!=c) && (s[i]))++i;
return (s[i]==c) ? s+=i : "";
}

char *Tstrchrnul(char *s, int c){
unsigned int i=0;
while ( (s[i]!=c) && (s[i]))++i;
return s+=i;
}

unsigned int Tstrlen(char *s){
unsigned int i=0;
while (s[i++]);
return --i;
}

char *Tstrstr(char *haystack, char *needle){
if (!needle[0]) return haystack;
unsigned int i;
while (*haystack){
	while (haystack[0]!=needle[0])haystack++;
	i=1;
	while (haystack[i] && needle[i] && haystack[i]==needle[i++]);
	if (!needle[i]) return haystack;
	else haystack+=i;
}
return haystack;
}


#define TOLOWER(c) (( 'A' <= c && c <= 'Z') ? (c+('a'-'A')) : c)
char *Tstrcasestr(char *haystack, char *needle){
if (!needle[0]) return haystack;
unsigned int i=0;
do{	if ( 'A' <= needle[i] && needle[i] <='Z' ) needle[i]+=('A'-'a');
	}while (needle[++i]);
while (*haystack){
	while (TOLOWER(haystack[0])!=needle[0])haystack++;
	i=1;
	while (haystack[i] && needle[i] && TOLOWER(haystack[i])==needle[i++]);
	if (!needle[i]) return haystack;
	else haystack+=i;
}
return haystack;
}

Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

#11 Post by technosaurus »

a couple more string functions ... anyone using musl libc, may want the strverscmp since musl's doesn't actually compare versions (musl's will break programs like ls because its just uses strcmp, so your listings will be 1 10 100 2 20 instead of 1 2 10 20 ...)

Code: Select all

int Tstrverscmp(const char *l, const char *r){
int ret=0,buf=0;
while ( *l && *r && l[0]==r[0] ){l++;r++;}
while ( '0' <= l[0] && l[0] <= '9') ret=(10 * ret)+l++[0]-'0';
while ( '0' <= r[0] && r[0] <= '9') buf=(10 * buf)+r++[0]-'0';
return ret - buf;
}


void swab(const void *src, void *dest, unsigned int n){
if (n & 1) --n;
char *d=dest;
const char *s=src;
while (n>0){
	d[n-2] = s[n-1];
	d[n-1] = s[n-=2];
}
}
Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

#12 Post by technosaurus »

strerror is typically unnecessarily large, storing as much as 10kb of unused string constants in global, I condensed these down to a set of macros that only stores the necessary error strings, while I was at it, I threw in a set of enums for the error codes so you can use it without any system includes

Code: Select all

#if 1 /* change to 0 for included error codes, else use system ones */
#include <errno.h>
#else
enum{
 ENONE,
 EPERM,				/* Operation not permitted */
 ENOENT,			/* No such file or directory */
 ESRCH,				/* No such process */
 EINTR,				/* Interrupted system call */
 EIO,				/* I/O error */
 ENXIO,				/* No such device or address */
 E2BIG,				/* Argument list too long */
 ENOEXEC,			/* Exec format error */
 EBADF,				/* Bad file number */
 ECHILD,			/* No child processes */
 EAGAIN,			/* Try again */
 ENOMEM,			/* Out of memory */
 EACCES,			/* Permission denied */
 EFAULT,			/* Bad address */
 ENOTBLK,			/* Block device required */
 EBUSY,				/* Device or resource busy */
 EEXIST,			/* File exists */
 EXDEV,				/* Cross-device link */
 ENODEV,			/* No such device */
 ENOTDIR,			/* Not a directory */
 EISDIR,			/* Is a directory */
 EINVAL,			/* Invalid argument */
 ENFILE,			/* File table overflow */
 EMFILE,			/* Too many open files */
 ENOTTY,			/* Not a typewriter */
 ETXTBSY,			/* Text file busy */
 EFBIG,				/* File too large */
 ENOSPC,			/* No space left on device */
 ESPIPE,			/* Illegal seek */
 EROFS,				/* Read-only file system */
 EMLINK,			/* Too many links */
 EPIPE,				/* Broken pipe */
 EDOM,				/* Math argument out of domain of func */
 ERANGE,			/* Math result not representable */
 EDEADLK,			/* Resource deadlock would occur */
 ENAMETOOLONG,		/* File name too long */
 ENOLCK,			/* No record locks available */
 ENOSYS,			/* Function not implemented */
 ENOTEMPTY,			/* Directory not empty */
 ELOOP,				/* Too many symbolic links encountered */
 EWOULDBLOCK,		/* Operation would block */
 ENOMSG,			/* No message of desired type */
 EIDRM,				/* Identifier removed */
 ECHRNG,			/* Channel number out of range */
 EL2NSYNC,			/* Level 2 not synchronized */
 EL3HLT,			/* Level 3 halted */
 EL3RST,			/* Level 3 reset */
 ELNRNG,			/* Link number out of range */
 EUNATCH,			/* Protocol driver not attached */
 ENOCSI,			/* No CSI structure available */
 EL2HLT,			/* Level 2 halted */
 EBADE,				/* Invalid exchange */
 EBADR,				/* Invalid request descriptor */
 EXFULL,			/* Exchange full */
 ENOANO,			/* No anode */
 EBADRQC,			/* Invalid request code */
 EBADSLT,			/* Invalid slot */
 EDEADLOCK,			/* EDEADLK 	Resource deadlock would occur */
 EBFONT,			/* Bad font file format */
 ENOSTR,			/* Device not a stream */
 ENODATA,			/* No data available */
 ETIME,				/* Timer expired */
 ENOSR,				/* Out of streams resources */
 ENONET,			/* Machine is not on the network */
 ENOPKG,			/* Package not installed */
 EREMOTE,			/* Object is remote */
 ENOLINK,			/* Link has been severed */
 EADV,				/* Advertise error */
 ESRMNT,			/* Srmount error */
 ECOMM,				/* Communication error on send */
 EPROTO,			/* Protocol error */
 EMULTIHOP,			/* Multihop attempted */
 EDOTDOT,			/* RFS specific error */
 EBADMSG,			/* Not a data message */
 EOVERFLOW,			/* Value too large for defined data type */
 ENOTUNIQ,			/* Name not unique on network */
 EBADFD,			/* File descriptor in bad state */
 EREMCHG,			/* Remote address changed */
 ELIBACC,			/* Can not access a needed shared library */
 ELIBBAD,			/* Accessing a corrupted shared library */
 ELIBSCN,			/* .lib section in a.out corrupted */
 ELIBMAX,			/* Attempting to link in too many shared libraries */
 ELIBEXEC,			/* Cannot exec a shared library directly */
 EILSEQ,			/* Illegal byte sequence */
 ERESTART,			/* Interrupted system call should be restarted */
 ESTRPIPE,			/* Streams pipe error */
 EUSERS,			/* Too many users */
 ENOTSOCK,			/* Socket operation on non-socket */
 EDESTADDRREQ,		/* Destination address required */
 EMSGSIZE,			/* Message too long */
 EPROTOTYPE,		/* Protocol wrong type for socket */
 ENOPROTOOPT,		/* Protocol not available */
 EPROTONOSUPPORT,	/* Protocol not supported */
 ESOCKTNOSUPPORT,	/* Socket type not supported */
 EOPNOTSUPP,		/* Operation not supported on transport endpoint */
 EPFNOSUPPORT,		/* Protocol family not supported */
 EAFNOSUPPORT,		/* Address family not supported by protocol */
 EADDRINUSE,		/* Address already in use */
 EADDRNOTAVAIL,		/* Cannot assign requested address */
 ENETDOWN,			/* Network is down */
 ENETUNREACH,		/* Network is unreachable */
 ENETRESET,			/* Network dropped connection because of reset */
 ECONNABORTED,		/* Software caused connection abort */
 ECONNRESET,		/* Connection reset by peer */
 ENOBUFS,			/* No buffer space available */
 EISCONN,			/* Transport endpoint is already connected */
 ENOTCONN,			/* Transport endpoint is not connected */
 ESHUTDOWN,			/* Cannot send after transport endpoint shutdown */
 ETOOMANYREFS,		/* Too many references: cannot splice */
 ETIMEDOUT,			/* Connection timed out */
 ECONNREFUSED,		/* Connection refused */
 EHOSTDOWN,			/* Host is down */
 EHOSTUNREACH,		/* No route to host */
 EALREADY,			/* Operation already in progress */
 EINPROGRESS,		/* Operation now in progress */
 ESTALE,			/* Stale NFS file handle */
 EUCLEAN,			/* Structure needs cleaning */
 ENOTNAM,			/* Not a XENIX named type file */
 ENAVAIL,			/* No XENIX semaphores available */
 EISNAM,			/* Is a named type file */
 EREMOTEIO,			/* Remote I/O error */
 EDQUOT,			/* Quota exceeded */
 ENOMEDIUM,			/* No medium found */
 EMEDIUMTYPE,		/* Wrong medium type */
 ECANCELED,			/* Operation Canceled */
 ENOKEY,			/* Required key not available */
 EKEYEXPIRED,		/* Key has expired */
 EKEYREVOKED,		/* Key has been revoked */
 EKEYREJECTED,		/* Key was rejected by service */
 EOWNERDEAD,		/* Owner died */
 ENOTRECOVERABLE,	/* State not recoverable */
 ERFKILL,			/* Operation not possible due to RF-kill */
};
#endif

...deleted b/c it was wrong - todo - fix
Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

Ibidem
Posts: 549
Joined: Wed 26 May 2010, 03:31
Location: State of Jefferson

#13 Post by Ibidem »

technosaurus wrote:a couple more string functions ... anyone using musl libc, may want the strverscmp since musl's doesn't actually compare versions (musl's will break programs like ls because its just uses strcmp, so your listings will be 1 10 100 2 20 instead of 1 2 10 20 ...)

Code: Select all

int Tstrverscmp(const char *l, const char *r){
int ret=0,buf=0;
while ( *l && *r && l[0]==r[0] ){l++;r++;}
while ( '0' <= l[0] && l[0] <= '9') ret=(10 * ret)+l++[0]-'0';
while ( '0' <= r[0] && r[0] <= '9') buf=(10 * buf)+r++[0]-'0';
return ret - buf;
}

I see that the musl source has "FIXME" there (ie, patches welcome)...
...What is the license on this?
(if it's ok and compatibly licensed, I'd be willing to try working it into a patch).

User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

#14 Post by technosaurus »

Ibidem wrote:I see that the musl source has "FIXME" there (ie, patches welcome)...
...What is the license on this?
(if it's ok and compatibly licensed, I'd be willing to try working it into a patch)
That would be fine.

This work is released to Public Domain.
In locales that do not recognize public domain it is:
Copyright Brad Conroy 2012, permission is hereby granted to use this work in accordance with any license approved by the Open Source Initiative for any purpose without restriction in perpetuity.


I have a note here about switching a do while loop instead on the last 2 loops - (for non-numerical matches) ...

... "help", "hello" will return 0 otherwise

not at a compiler to test but, best guess is:

Code: Select all

int strverscmp(const char *l, const char *r){ 
int ret=0,buf=0; 
while ( *l && *r && l[0]==r[0] ){l++;r++;} 
do {ret=(10 * ret)+l++[0]-'0'; } while ( '0' <= l[0] && l[0] <= '9') ; 
do {buf=(10 * buf)+r++[0]-'0'; } while ( '0' <= r[0] && r[0] <= '9') 
return ret - buf; 
} 
btw, one thing I would be interested in seeing in musl is a platform independent version with no assembly (or just basic assembly common to nearly all platforms) not just for ease of porting, but this will be an absolute godsend for JIT compilation like llvm/clang especially once the opencl (using the gpu) stuff stabilizes because it would allow the same compiled bytecode to be run on any system using only a small bytecode->machine code compiler
Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

Ibidem
Posts: 549
Joined: Wed 26 May 2010, 03:31
Location: State of Jefferson

#15 Post by Ibidem »

technosaurus wrote:
Ibidem wrote:I see that the musl source has "FIXME" there (ie, patches welcome)...
...What is the license on this?
(if it's ok and compatibly licensed, I'd be willing to try working it into a patch)
That would be fine.

This work is released to Public Domain.
In locales that do not recognize public domain it is:
Copyright Brad Conroy 2012, permission is hereby granted to use this work in accordance with any license approved by the Open Source Initiative for any purpose without restriction in perpetuity.


I have a note here about switching a do while loop instead on the last 2 loops - (for non-numerical matches) ...

... "help", "hello" will return 0 otherwise

not at a compiler to test but, best guess is:

Code: Select all

int strverscmp(const char *l, const char *r){ 
int ret=0,buf=0; 
while ( *l && *r && l[0]==r[0] ){l++;r++;} 
do {ret=(10 * ret)+l++[0]-'0'; } while ( '0' <= l[0] && l[0] <= '9') ; 
do {buf=(10 * buf)+r++[0]-'0'; } while ( '0' <= r[0] && r[0] <= '9') 
return ret - buf; 
} 
btw, one thing I would be interested in seeing in musl is a platform independent version with no assembly (or just basic assembly common to nearly all platforms) not just for ease of porting, but this will be an absolute godsend for JIT compilation like llvm/clang especially once the opencl (using the gpu) stuff stabilizes because it would allow the same compiled bytecode to be run on any system using only a small bytecode->machine code compiler
I'm not sure that noasm is practical...

When I submitted the patch to Rich, he pointed out that there's an integer overflow risk. We discussed the method and came up with another method (probably faster thanks to the lack of multiplication):

Code: Select all

int strverscmp(const char *l, const char *r){
	int haszero=0;
	while (*l && *r && l[0]==r[0]){
		if (l[0]=='0'){
			haszero=0;
		} else if (!isdigit(l[0])) {
			haszero=1;
		}
		l++; r++;
	}
	if ((isdigit(l[0]) && isdigit(r[0]) ) && haszero) {
	//return the one with the longer substring of numbers
		int lenl=0, lenr=0, firstl=l[0], firstr=r[0];
		while (isdigit(l++[0]) ) {
			lenl++;
		}
		while (isdigit(r++[0]) ) {
			lenr++;
		}
		if (lenl==lenr) {
			return (firstl -  firstr);
		} else {
			return (lenl - lenr);
		}
	} else {
		return (l[0] -  r[0]);
	}
} 

isdigit() on musl is identical to the macro you used for testing for numbers.

User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

#16 Post by technosaurus »

just need to figure out how to deal with blah-0041.2 and blah-041.1
Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

Ibidem
Posts: 549
Joined: Wed 26 May 2010, 03:31
Location: State of Jefferson

#17 Post by Ibidem »

technosaurus wrote:just need to figure out how to deal with blah-0041.2 and blah-041.1
If you look in the manpage, that isn't ambiguous: numbers starting with 0 are considered to have an implied decimal point before the first 0, so the more leading zeros, the smaller.
Since it diverges at 00 vs 04, anything after the decimal point is ignored.

But there is another bug in the version I just posted:
compare 2004 204, and my code assumes it's dealing with a leading 0 :oops:
I have an idea for fixing it, but haven't done so yet.

User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

#18 Post by technosaurus »

simple qsort example, takes all arguments and prints sorted tab separated list

Code: Select all

#include <string.h>

static inline int cmp(const void *a, const void *b){ 
	return strcmp(*(const char **)a, *(const char **)b);
}

int main(int argc, char *argv[]){
    qsort(++argv, --argc, sizeof(char *), cmp);
    while (argc){
		write(1,argv[0],strlen(argv[0]));
		write(1,(--argc && argv++)?"\t":"\n",1);
	}
}
Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

#19 Post by technosaurus »

a faster (but bigger) strlen -needs work (misses \0 on multiples of 8)

Code: Select all

size_t strlen(char *s){
size_t *v,ret;
v=(size_t  *)s;
while (!(~((((*v & 0x7F7F7F7F) + 0x7F7F7F7F) | *v) | 0x7F7F7F7F))) v++; /*64bit size_t?*/
ret=(char *)v-s;
while (s[++ret]);
return ret;
}
notes: need to use an unsigned int or 0 is 111111111111.....
maybe use:
((*v>>24)&&(*v&0xFF00FFFF))&&((*v&0xFFFF00FF)&&(*v<<24))
because the compiler can combine some of these operations and/or thread them
Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

User avatar
technosaurus
Posts: 4853
Joined: Mon 19 May 2008, 01:24
Location: Blue Springs, MO
Contact:

#20 Post by technosaurus »

here is a one-liner to generate macros for 300+ syscalls

Code: Select all

grep __NR_ unistd_32.h |awk '
{print "#define " substr($2,6,length($2)) "(...) syscall(" $2 ", __VA_ARGS__ )"}
' | sort > syscalls.h
and now you have a 0kb c "library"

Note: zero-arg functions like fork() may need to be modified on older compilers and all of the types (structs, etc...) will need to be defined and there is absolutely no type checking. If you would like some semblance of type checking, you can later modify the defines to be static inline functions like:

Code: Select all

static inline long mycall(int a, char *b, struct somestruct c){
return syscall(__NR_mycall, a, b, c);
}
There are quite a few other functions in the kernel that would be useful if you wanted to wrap them in a syscall for userspace (all of the crypto stuff and filesystem detection for instance)
Check out my [url=https://github.com/technosaurus]github repositories[/url]. I may eventually get around to updating my [url=http://bashismal.blogspot.com]blogspot[/url].

Post Reply