[Golang] Get Url of All Posts of Instagram User


[Update]: Due to Instagram API change, the method in this post is invalid now. But we can still apply the same idea of the method in the post to get all post urls from new GraphQL API. See my instago repo for more details.

Interesting small program to get URL of all posts of a specific Instagram user.

In this program only Go standard library is used, no third-party packages.

To access the Instagram API via local Go program, you need to login Instagram and get the following information from your browser:

  • ds_user_id
  • sessionid
  • csrftoken
ds_user_id sessionid csrftoken

Please see this SO answer to get above values on Chrome browser.

In each HTTP request, Instagram API returns only one page (12 posts), and also a token (end_cursor in this case) to get next page. The token is embedded in the query string of next HTTP request, and the name is max_id. Please read the code for more details.

userinfo.go | repository | view raw
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
package igpost

import (
	"encoding/json"
	"errors"
	"fmt"
	"io/ioutil"
	"net/http"
	"strconv"
	"strings"
)

// no need to login or cookie to access this URL. But if login to Instagram,
// private account will return private data if you are allowed to view the
// private account.
const urlUserInfo = `https://www.instagram.com/{{USERNAME}}/?__a=1`

const userAgent = "Instagram 10.3.2 (iPhone7,2; iPhone OS 9_3_3; en_US; en-US; scale=2.00; 750x1334) AppleWebKit/420+"

// used to decode the JSON data
type RawUserResp struct {
	User UserInfo `json:"user"`
}

// You can add more fields in the struct to get more information
// See response/types.go in github.com/ahmdrz/goinsta
type UserInfo struct {
	Biography       string `json:"biography"`
	ExternalUrl     string `json:"external_url"`
	FullName        string `json:"full_name"`
	Id              string `json:"id"`
	IsPrivate       bool   `json:"is_private"`
	ProfilePicUrlHd string `json:"profile_pic_url_hd"`
	Username        string `json:"username"`
	Media           struct {
		Nodes    []MediaNode `json:"nodes"`
		Count    int64       `json:"count"`
		PageInfo struct {
			HasNextPage bool   `json:"has_next_page"`
			EndCursor   string `json:"end_cursor"`
		} `json:"page_info"`
	} `json:"media"`
}

type MediaNode struct {
	Code    string `json:"code"` // url of the post
	Date    int64  `json:"date"`
	Caption string `json:"caption"`
}

// Send HTTP request and get http response on behalf of a specific Instagram
// user. After login to Instagram, you can get the cookies of *ds_user_id*,
// *sessionid*, *csrftoken* in Chrome Developer Tools.
// See https://stackoverflow.com/a/44773079
// or
// https://github.com/hoschiCZ/instastories-backup#obtain-cookies
func getHTTPResponse(url, ds_user_id, sessionid, csrftoken string) (b []byte, err error) {
	req, err := http.NewRequest("GET", url, nil)
	if err != nil {
		return
	}

	req.AddCookie(&http.Cookie{Name: "ds_user_id", Value: ds_user_id})
	req.AddCookie(&http.Cookie{Name: "sessionid", Value: sessionid})
	req.AddCookie(&http.Cookie{Name: "csrftoken", Value: csrftoken})

	req.Header.Set("User-Agent", userAgent)

	client := &http.Client{}
	resp, err := client.Do(req)
	if err != nil {
		return
	}
	defer resp.Body.Close()

	if resp.StatusCode != 200 {
		err = errors.New(
			"resp.StatusCode: " +
				strconv.Itoa(resp.StatusCode))
		return
	}

	return ioutil.ReadAll(resp.Body)
}

// Given user name, return codes of all posts of the user.
func GetAllPostCode(username, ds_user_id, sessionid, csrftoken string) (codes []string, err error) {
	r := RawUserResp{}
	r.User.Media.PageInfo.HasNextPage = true
	for r.User.Media.PageInfo.HasNextPage == true {
		url := strings.Replace(urlUserInfo, "{{USERNAME}}", username, 1)
		if len(codes) != 0 {
			url = url + "&max_id=" + r.User.Media.PageInfo.EndCursor
		}

		b, err := getHTTPResponse(url, ds_user_id, sessionid, csrftoken)
		if err != nil {
			return codes, err
		}

		if err = json.Unmarshal(b, &r); err != nil {
			return codes, err
		}

		for _, node := range r.User.Media.Nodes {
			codes = append(codes, node.Code)
		}
		fmt.Printf("Getting %d from %s ...\n", len(codes), url)
	}
	return
}

Example:

userinfo_test.go | repository | view raw
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
package igpost

import (
	"fmt"
	"os"
	"testing"
)

func ExampleGetAllPostCode(t *testing.T) {
	codes, err := GetAllPostCode(
		os.Getenv("IG_TEST_USERNAME"),
		os.Getenv("IG_DS_USER_ID"),
		os.Getenv("IG_SESSIONID"),
		os.Getenv("IG_CSRFTOKEN"))
	if err != nil {
		t.Error(err)
		return
	}
	for _, code := range codes {
		fmt.Printf("URL: https://www.instagram.com/p/%s/\n", code)
	}
}

The full code is also available on my GitHub repo [1].


Tested on: Ubuntu Linux 17.10, Go 1.9.4.


References:

[1]GitHub - siongui/goigmedia: Get links of Instagram user media (photos and videos) in Go.