: To unbundle, sh this file echo file 'pi.1' >&2 cat >'pi.1' <<'End of pi.1' .LC @(#)pi.1 1.4 7/2/91 .TH PI 1E \*(eH .SH NAME pi \- compute theoretical isoleectric point .SH SYNOPSIS .B pi [ .B \-w ] [ sequence or filename ] .= pi .SH DESCRIPTION .I Pi computes a theoretical isoelectric point for a sequence using Sillero and Ribeiro's \*(lqabridged\*(rq method. If the command line is argument free (or the only argument is \*(lq-\*(rq), the sequence is read from the standard input; if the command line has one argument that's the name of a file, the sequence is read from the named file; otherwise the command line must have one argument which itself is used as the sequence. .PP This option is available: .TP .B \-w Use the weighted mean for pKNa and pKPa values. .SH SEE ALSO Sillero, A. and J. M. Ribeiro, \*(lqIsoelectric points of proteins: theoretical determination,\*(rq .IR "Anal. Biochem." , vol. 179, no. 2, pp. 319-25, 1989. The results produced without the .B \-w option agree exactly with those in Sillero and Ribeiro's paper; the results produced with the ./B \-w option differ (by up to 0.04 on the sequences given in the paper). A June 17, 1991 letter from Prof. Sillero indicates that the results produced by the program are correct. End of pi.1 echo file 'pi.c' >&2 cat >'pi.c' <<'End of pi.c' static char elsieid[] = "@(#)pi.c 2.2"; #include "stdio.h" #include "math.h" extern int optind; #ifndef alloc_size_t #define alloc_size_t unsigned #endif /* !defined alloc_size_t */ #define PKN1 3.2 #define PKN2 4.0 #define PKN3 4.5 #define PKNA 4.2 #define PKN4 9.0 #define PKN5 10.0 #define PKNB 9.5 #define PKP1 6.4 #define PKPB 6.4 #define PKP2 8.2 #define PKP3 10.4 #define PKP4 12.0 #define PKPA 11.2 static char * progname; typedef struct { double r; double i; } complex; static complex ritoc(r, i) double r, i; { complex c; c.r = r; c.i = i; return c; } #define rtoc(r) ritoc((r), 0.0) static complex ccadd(a, b) complex a, b; { return ritoc(a.r + b.r, a.i + b.i); } static complex ccsub(a, b) complex a, b; { return ccadd(a, ritoc(-b.r, -b.i)); } static complex ccmul(a, b) complex a, b; { return ritoc(a.r * b.r - a.i * b.i, a.r * b.i + a.i * b.r); } static complex ccdiv(a, b) complex a, b; { register double d; d = b.r * b.r + b.i * b.i; return ccmul(a, ritoc(b.r / d, -b.i / d)); } static complex crpow(c, d) complex c; double d; { register double r, theta; r = pow(hypot(c.r, c.i), d); if (r == 0) return rtoc(0.0); theta = atan2(c.i, c.r) * d; return ritoc(r * cos(theta), r * sin(theta)); } #define csqrt(c) crpow((c), 1.0 / 2.0) #define ccbrt(c) crpow((c), 1.0 / 3.0) static double cubic(p, q, r) double p, q, r; { register double a, b; complex A, B, left, right; a = (3 * q - p * p) / 3; b = (2 * p * p * p - 9 * p * q + 27 * r) / 27; left = rtoc(-b / 2); right = csqrt(rtoc(b * b / 4 + a * a * a / 27)); A = ccbrt(ccadd(left, right)); B = ccbrt(ccsub(left, right)); return A.r + B.r - p / 3; } static double quartic(a, b, c, d) double a, b, c, d; { register double p, q, r, y; complex R, D, left, right; p = -b; q = a * c - 4 * d; r = -(a * a * d) + 4 * b * d - c * c; y = cubic(p, q, r); R = csqrt(rtoc(a * a / 4 - b + y)); left = rtoc(3 * a * a / 4 - 2 * b); if (R.r != 0 || R.i != 0) { left = ccsub(left, ccmul(R, R)); right = ccdiv(rtoc((4 * a * b - 8 * c - a * a * a) / 4), R); } else right = ccmul(rtoc(2.0), csqrt(rtoc(y * y - 4 * d))); D = csqrt(ccadd(left, right)); return -a / 4 + R.r / 2 + D.r / 2; } static void wild2exit(str1, str2) char * str1; char * str2; { (void) fprintf(stderr, "%s: wild", progname); if (str1 != NULL && *str1 != '\0') (void) fprintf(stderr, " %s", str1); if (str2 != NULL && *str2 != '\0') (void) fprintf(stderr, " %s", str2); (void) fprintf(stderr, "\n"); exit(1); } #define wildexit(str) wild2exit((str), (char *) NULL) static char * memcheck(cp) char * cp; { if (cp == NULL) wildexit("result from allocating memory"); return cp; } static char * emalloc(size) int size; { return memcheck((char *) malloc((alloc_size_t) size)); } static char * erealloc(pointer, size) char * pointer; int size; { if (pointer == NULL) return memcheck(emalloc(size)); else return memcheck((char *) realloc(pointer, (alloc_size_t) size)); } static char * getseq(string) char * string; { register char * filename; register FILE * fp; register char * sequence; register int c; register int i; register int length; if (strcmp(string, "-") == 0) { static char stdname[] = "standard input"; filename = stdname; fp = stdin; } else { filename = string; fp = fopen(filename, "r"); } if (fp == NULL) sequence = filename; else { sequence = emalloc(1); i = 0; for ( ; ; ) { c = getc(fp); if (c == EOF) break; if (c == '\0') wildexit("ASCII nul in sequence"); sequence = erealloc(sequence, (int) ((i + 2) * sizeof *sequence)); sequence[i++] = c; } sequence[i] = '\0'; if (ferror(fp) || !feof(fp)) wildexit("result reading sequence"); if (fp != stdin) if (fclose(fp)) wild2exit("result closing", filename); } length = 0; for (i = 0; (c = sequence[i]) != '\0'; ++i) { if (!isascii(c)) wildexit("non-ASCII input"); if (!isalpha(c)) continue; if (islower(c)) c = toupper(c); sequence[length++] = c; } sequence[length] = '\0'; return sequence; } static int count(sequence, letter) char * sequence; int letter; { register int result; result = 0; while (*sequence != '\0') if (*sequence++ == letter) ++result; return result; } static void usage() { (void) fprintf(stderr, "%s: usage is %s [ -w ] [sequence | filename]\n", progname, progname); exit(1); } int main(argc, argv) int argc; char * argv[]; { register int ch; register int n1, n2, n3, n4, n5, p1, p2, p3, p4, na, nb, pa, pb; register double pkna, pknb, pkpa, pkpb; register double kna, knb, kpa, kpb; register double a, b, c, d; register double t1, t2, t3, t4, t5, t6, t7, t8; register int wflag; register char * sequence; progname = argv[0]; wflag = 0; while ((ch = getopt(argc, argv, "w")) != EOF) if (ch == 'w') wflag = 1; else usage(); if (optind != argc && optind != argc - 1) usage(); if (optind == argc - 1 && strcmp(argv[optind], "=") == 0) usage(); /* by request */ sequence = getseq((optind == argc) ? "-" : argv[optind]); n1 = 1; /* terminal carboxyl */ n2 = count(sequence, 'D'); /* asp */ n3 = count(sequence, 'E'); /* glu */ n4 = count(sequence, 'C'); /* cys */ n5 = count(sequence, 'Y'); /* tyr */ p1 = count(sequence, 'H'); /* his */ p2 = 1; /* terminal ammonium */ p3 = count(sequence, 'K'); /* lys */ p4 = count(sequence, 'R'); /* arg */ na = n1 + n2 + n3; nb = n4 + n5; pa = p2 + p3 + p4; pb = p1; if (wflag) { pkna = (n1 * PKN1 + n2 * PKN2 + n3 * PKN3) / na; pkpa = (p2 * PKP2 + p3 * PKP3 + p4 * PKP4) / pa; /* ** Why not ** if (nb == 0) ** knb = PKNB; ** else knb = (n4 * PKN4 + n5 * PKN5) / nb; ** ? */ pknb = PKNB; pkpb = PKPB; } else { pkna = PKNA; pkpa = PKPA; pknb = PKNB; pkpb = PKPB; } kna = exp10(-pkna); knb = exp10(-pknb); kpa = exp10(-pkpa); kpb = exp10(-pkpb); t1 = pa + pb; t2 = na + nb; t3 = kpa + kpb; t4 = kna + knb; t5 = kpa * kpb; t6 = kna * knb; t7 = pa * kpb + pb * kpa; t8 = na * kna + nb * knb; a = t1 * t4 + t7 - t8; b = t1 * t6 + t4 * t7 - t2 * t6 - t3 * t8; c = t6 * t7 - t2 * t3 * t6 - t5 * t8; d = -t2 * t5 * t6; a /= t1; b /= t1; c /= t1; d /= t1; (void) printf("%s %s # %s\n", progname, sequence, elsieid); (void) printf("%.2f\n", -log10(quartic(a, b, c, d))); if (fflush(stdout) || ferror(stdout)) wildexit("result from writing"); return 0; } End of pi.c echo file 'makefile' >&2 cat >'makefile' <<'End of makefile' pi: pi.c cc pi.c -lm -o pi End of makefile exit